diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 64c85c1101e6e6..fb11c82bb96d9c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,13 +1,10 @@ { - "image": "ghcr.io/python/devcontainer:2024.09.25.11038928730", + "image": "ghcr.io/python/devcontainer:latest", "onCreateCommand": [ // Install common tooling. "dnf", "install", "-y", - "which", - "zsh", - "fish", // For umask fix below. "/usr/bin/setfacl" ], diff --git a/.editorconfig b/.editorconfig index 5b04b32a89e3d2..25bc5935258bd1 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,11 +1,11 @@ root = true -[*.{py,c,cpp,h,js,rst,md,yml,yaml}] +[*.{py,c,cpp,h,js,rst,md,yml,yaml,gram}] trim_trailing_whitespace = true insert_final_newline = true indent_style = space -[*.{py,c,cpp,h}] +[*.{py,c,cpp,h,gram}] indent_size = 4 [*.rst] diff --git a/.gitattributes b/.gitattributes index 2f5a030981fb94..16264cacb0e34e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,6 +10,7 @@ *.ico binary *.jpg binary *.pck binary +*.pdf binary *.png binary *.psd binary *.tar binary @@ -67,6 +68,7 @@ PCbuild/readme.txt dos **/clinic/*.cpp.h generated **/clinic/*.h.h generated *_db.h generated +Doc/c-api/lifecycle.dot.svg generated Doc/data/stable_abi.dat generated Doc/library/token-list.inc generated Include/internal/pycore_ast.h generated @@ -81,6 +83,7 @@ Include/opcode_ids.h generated Include/token.h generated Lib/_opcode_metadata.py generated Lib/keyword.py generated +Lib/idlelib/help.html generated Lib/test/certdata/*.pem generated Lib/test/certdata/*.0 generated Lib/test/levenshtein_examples.json generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 16331b65e52979..9c262f47a404e2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,7 +8,7 @@ .github/** @ezio-melotti @hugovk @AA-Turner # pre-commit -.pre-commit-config.yaml @hugovk @AlexWaygood +.pre-commit-config.yaml @hugovk .ruff.toml @hugovk @AlexWaygood @AA-Turner # Build system @@ -16,6 +16,10 @@ configure* @erlend-aasland @corona10 Makefile.pre.in @erlend-aasland Modules/Setup* @erlend-aasland +# generate-build-details +Tools/build/generate-build-details.py @FFY00 +Lib/test/test_build_details.py @FFY00 + # argparse **/*argparse* @savannahostrowski @@ -27,6 +31,7 @@ Modules/Setup* @erlend-aasland **/*genobject* @markshannon **/*hamt* @1st1 **/*jit* @brandtbucher @savannahostrowski +Python/perf_jit_trampoline.c # Exclude the owners of "**/*jit*", above. Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra @@ -53,6 +58,7 @@ Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon Lib/test/test_pyrepl/* @pablogsal @lysnikolaou @ambv Tools/c-analyzer/ @ericsnowcurrently +Tools/check-c-api-docs/ @ZeroIntensity # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka @@ -281,14 +287,21 @@ Doc/howto/clinic.rst @erlend-aasland # Subinterpreters **/*interpreteridobject.* @ericsnowcurrently **/*crossinterp* @ericsnowcurrently -Lib/test/support/interpreters/ @ericsnowcurrently Modules/_interp*module.c @ericsnowcurrently +Lib/test/test__interp*.py @ericsnowcurrently +Lib/concurrent/interpreters/ @ericsnowcurrently +Lib/test/support/channels.py @ericsnowcurrently +Doc/library/concurrent.interpreters.rst @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently +Lib/concurrent/futures/interpreter.py @ericsnowcurrently # Android **/*Android* @mhsmith @freakboy3742 **/*android* @mhsmith @freakboy3742 +# Apple +/Apple @freakboy3742 + # iOS (but not termios) **/iOS* @freakboy3742 **/ios* @freakboy3742 @@ -298,7 +311,12 @@ Lib/test/test_interpreters/ @ericsnowcurrently **/*-ios* @freakboy3742 # WebAssembly -/Tools/wasm/ @brettcannon @freakboy3742 +Tools/wasm/config.site-wasm32-emscripten @freakboy3742 +/Tools/wasm/README.md @brettcannon @freakboy3742 +/Tools/wasm/wasi-env @brettcannon +/Tools/wasm/wasi_build.py @brettcannon +/Tools/wasm/emscripten @freakboy3742 +/Tools/wasm/wasi @brettcannon # SBOM /Misc/externals.spdx.json @sethmlarson @@ -326,3 +344,8 @@ Modules/_xxtestfuzz/ @ammaraskar **/*templateobject* @lysnikolaou **/*templatelib* @lysnikolaou **/*tstring* @lysnikolaou + +# Remote debugging +Python/remote_debug.h @pablogsal +Python/remote_debugging.c @pablogsal +Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index 68aae196357414..267ff6b42a8655 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -1,6 +1,7 @@ self-hosted-runner: # Pending https://github.com/rhysd/actionlint/issues/533 - labels: ["windows-11-arm"] + # and https://github.com/rhysd/actionlint/issues/571 + labels: ["windows-11-arm", "macos-15-intel"] config-variables: null diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b192508c78685c..b3b98ba3ba62f1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,13 @@ permissions: contents: read concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-reusable + # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#concurrency + # 'group' must be a key uniquely representing a PR or push event. + # github.workflow is the workflow name + # github.actor is the user invoking the workflow + # github.head_ref is the source branch of the PR or otherwise blank + # github.run_id is a unique number for the current run + group: ${{ github.workflow }}-${{ github.actor }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: @@ -43,6 +49,53 @@ jobs: if: fromJSON(needs.build-context.outputs.run-docs) uses: ./.github/workflows/reusable-docs.yml + check-abi: + name: 'Check if the ABI has changed' + runs-on: ubuntu-22.04 # 24.04 causes spurious errors + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + sudo ./.github/workflows/posix-deps-apt.sh + sudo apt-get install -yq abigail-tools + - name: Build CPython + env: + CFLAGS: -g3 -O0 + run: | + # Build Python with the libpython dynamic library + ./configure --enable-shared + make -j4 + - name: Check for changes in the ABI + id: check + run: | + if ! make check-abidump; then + echo "Generated ABI file is not up to date." + echo "Please add the release manager of this branch as a reviewer of this PR." + echo "" + echo "The up to date ABI file should be attached to this build as an artifact." + echo "" + echo "To learn more about this check: https://devguide.python.org/getting-started/setup-building/index.html#regenerate-the-abi-dump" + echo "" + exit 1 + fi + - name: Generate updated ABI files + if: ${{ failure() && steps.check.conclusion == 'failure' }} + run: | + make regen-abidump + - uses: actions/upload-artifact@v4 + name: Publish updated ABI files + if: ${{ failure() && steps.check.conclusion == 'failure' }} + with: + name: abi-data + path: ./Doc/data/*.abi + check-autoconf-regen: name: 'Check if Autoconf files are up to date' # Don't use ubuntu-latest but a specific version to make the job @@ -103,20 +156,10 @@ jobs: python-version: '3.x' - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - # Include env.pythonLocation in key to avoid changes in environment when setup-python updates Python - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}-${{ env.pythonLocation }} - name: Install dependencies run: sudo ./.github/workflows/posix-deps-apt.sh - name: Add ccache to PATH run: echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Configure CPython run: | # Build Python with the libpython dynamic library @@ -124,7 +167,7 @@ jobs: - name: Build CPython run: | make -j4 regen-all - make regen-stdlib-module-names regen-sbom regen-unicodedata + make regen-stdlib-module-names regen-sbom - name: Check for changes run: | git add -u @@ -146,6 +189,9 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + - name: Check for undocumented C APIs + run: make check-c-api-docs + build-windows: name: >- @@ -172,8 +218,8 @@ jobs: free-threading: ${{ matrix.free-threading }} build-windows-msi: - name: >- # ${{ '' } is a hack to nest jobs under the same sidebar category - Windows MSI${{ '' }} + # ${{ '' } is a hack to nest jobs under the same sidebar category. + name: Windows MSI${{ '' }} # zizmor: ignore[obfuscation] needs: build-context if: fromJSON(needs.build-context.outputs.run-windows-msi) strategy: @@ -192,32 +238,23 @@ jobs: macOS ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-macos == 'true' strategy: fail-fast: false matrix: - # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. - # macOS 13 only runs tests against the GIL-enabled CPython. - # Cirrus used for upstream, macos-14 for forks. + # macos-14 is M1, macos-15-intel is Intel. + # macos-15-intel only runs tests against the GIL-enabled CPython. os: - - ghcr.io/cirruslabs/macos-runner:sonoma - macos-14 - - macos-13 - is-fork: # only used for the exclusion trick - - ${{ github.repository_owner != 'python' }} + - macos-15-intel free-threading: - false - true exclude: - - os: ghcr.io/cirruslabs/macos-runner:sonoma - is-fork: true - - os: macos-14 - is-fork: false - - os: macos-13 + - os: macos-15-intel free-threading: true uses: ./.github/workflows/reusable-macos.yml with: - config_hash: ${{ needs.build-context.outputs.config-hash }} free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} @@ -227,7 +264,7 @@ jobs: ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: @@ -249,7 +286,6 @@ jobs: bolt: true uses: ./.github/workflows/reusable-ubuntu.yml with: - config_hash: ${{ needs.build-context.outputs.config-hash }} bolt-optimizations: ${{ matrix.bolt }} free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} @@ -259,12 +295,15 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: os: [ubuntu-24.04] - openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] + # Keep 1.1.1w in our list despite it being upstream EOL and otherwise + # unsupported as it most resembles other 1.1.1-work-a-like ssl APIs + # supported by important vendors such as AWS-LC. + openssl_ver: [1.1.1w, 3.0.18, 3.2.6, 3.3.5, 3.4.3, 3.5.4] # See Tools/ssl/make_ssl_data.py for notes on adding a new version env: OPENSSL_VER: ${{ matrix.openssl_ver }} @@ -277,11 +316,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -303,10 +337,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Configure CPython run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" - name: Build CPython @@ -316,22 +346,65 @@ jobs: - name: SSL tests run: ./python Lib/test/ssltests.py + build-android: + name: Android (${{ matrix.arch }}) + needs: build-context + if: needs.build-context.outputs.run-android == 'true' + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + runs-on: macos-14 + - arch: x86_64 + runs-on: ubuntu-24.04 + + runs-on: ${{ matrix.runs-on }} + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Build and test + run: ./Android/android.py ci --fast-ci ${{ matrix.arch }}-linux-android + + build-ios: + name: iOS + needs: build-context + if: needs.build-context.outputs.run-ios == 'true' + timeout-minutes: 60 + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + # GitHub recommends explicitly selecting the desired Xcode version: + # https://github.com/actions/runner-images/issues/12541#issuecomment-3083850140 + # This became a necessity as a result of + # https://github.com/actions/runner-images/issues/12541 and + # https://github.com/actions/runner-images/issues/12751. + - name: Select Xcode version + run: | + sudo xcode-select --switch /Applications/Xcode_15.4.app + + - name: Build and test + run: python3 Apple ci iOS --fast-ci --simulator 'iPhone SE (3rd generation),OS=17.5' + build-wasi: name: 'WASI' needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-wasi == 'true' uses: ./.github/workflows/reusable-wasi.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} test-hypothesis: name: "Hypothesis tests on Ubuntu" runs-on: ubuntu-24.04 timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' env: - OPENSSL_VER: 3.0.16 + OPENSSL_VER: 3.0.18 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 @@ -358,10 +431,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Setup directory envs for out-of-tree builds run: | echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" @@ -372,11 +441,6 @@ jobs: run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} run: | @@ -445,13 +509,13 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: os: [ubuntu-24.04] env: - OPENSSL_VER: 3.0.16 + OPENSSL_VER: 3.0.18 PYTHONSTRICTEXTENSIONBUILD: 1 ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: @@ -460,11 +524,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -490,11 +549,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: Configure CPython run: ./configure --config-cache --with-address-sanitizer --without-pymalloc - name: Build CPython @@ -504,21 +558,28 @@ jobs: - name: Tests run: xvfb-run make ci - build-tsan: - name: >- - Thread sanitizer - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + build-san: + # ${{ '' } is a hack to nest jobs under the same sidebar category. + name: Sanitizers${{ '' }} # zizmor: ignore[obfuscation] needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' strategy: fail-fast: false matrix: + check-name: + - Thread free-threading: - false - true - uses: ./.github/workflows/reusable-tsan.yml + sanitizer: + - TSan + include: + - check-name: Undefined behavior + sanitizer: UBSan + free-threading: false + uses: ./.github/workflows/reusable-san.yml with: - config_hash: ${{ needs.build-context.outputs.config-hash }} + sanitizer: ${{ matrix.sanitizer }} free-threading: ${{ matrix.free-threading }} cross-build-linux: @@ -526,18 +587,13 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 60 needs: build-context - if: needs.build-context.outputs.run-tests == 'true' + if: needs.build-context.outputs.run-ubuntu == 'true' steps: - uses: actions/checkout@v4 with: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Set build dir @@ -615,10 +671,12 @@ jobs: - build-macos - build-ubuntu - build-ubuntu-ssltests + - build-android + - build-ios - build-wasi - test-hypothesis - build-asan - - build-tsan + - build-san - cross-build-linux - cifuzz if: always() @@ -633,41 +691,31 @@ jobs: test-hypothesis, cifuzz, allowed-skips: >- + ${{ !fromJSON(needs.build-context.outputs.run-docs) && 'check-docs,' || '' }} ${{ - !fromJSON(needs.build-context.outputs.run-docs) + needs.build-context.outputs.run-tests != 'true' && ' - check-docs, + check-autoconf-regen, + check-generated-files, ' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-windows-tests) && 'build-windows,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-ci-fuzz) && 'cifuzz,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-macos) && 'build-macos,' || '' }} ${{ - needs.build-context.outputs.run-tests != 'true' + !fromJSON(needs.build-context.outputs.run-ubuntu) && ' - check-autoconf-regen, - check-generated-files, - build-macos, build-ubuntu, build-ubuntu-ssltests, - build-wasi, test-hypothesis, build-asan, - build-tsan, + build-san, cross-build-linux, ' || '' }} - ${{ - !fromJSON(needs.build-context.outputs.run-windows-tests) - && ' - build-windows, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-ci-fuzz) - && ' - cifuzz, - ' - || '' - }} + ${{ !fromJSON(needs.build-context.outputs.run-android) && 'build-android,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-ios) && 'build-ios,' || '' }} + ${{ !fromJSON(needs.build-context.outputs.run-wasi) && 'build-wasi,' || '' }} jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 116e0c1e945e38..122a07d4bae29a 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -68,16 +68,16 @@ jobs: include: - target: i686-pc-windows-msvc/msvc architecture: Win32 - runner: windows-latest + runner: windows-2022 - target: x86_64-pc-windows-msvc/msvc architecture: x64 - runner: windows-latest + runner: windows-2022 - target: aarch64-pc-windows-msvc/msvc architecture: ARM64 runner: windows-11-arm - target: x86_64-apple-darwin/clang architecture: x86_64 - runner: macos-13 + runner: macos-15-intel - target: aarch64-apple-darwin/clang architecture: aarch64 runner: macos-14 @@ -102,17 +102,16 @@ jobs: ./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '' }} -p ${{ matrix.architecture }} ./PCbuild/rt.bat ${{ matrix.debug && '-d' || '' }} -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966. - # This is a bug in the macOS runner image where the pre-installed Python is installed in the same - # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes - # the symlink to the pre-installed Python so that the Homebrew Python is used instead. - name: macOS if: runner.os == 'macOS' run: | brew update - find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete brew install llvm@${{ matrix.llvm }} export SDKROOT="$(xcrun --show-sdk-path)" + # Set MACOSX_DEPLOYMENT_TARGET and -Werror=unguarded-availability to + # make sure we don't break downstream distributors (like uv): + export CFLAGS_JIT='-Werror=unguarded-availability' + export MACOSX_DEPLOYMENT_TARGET=10.15 ./configure --enable-experimental-jit --enable-universalsdk --with-universal-archs=universal2 ${{ matrix.debug && '--with-pydebug' || '' }} make all --jobs 4 ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 @@ -153,3 +152,26 @@ jobs: # - name: Run tests # run: | # ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 + tail-call-jit: + name: JIT with tail calling interpreter + needs: interpreter + runs-on: ubuntu-24.04 + timeout-minutes: 90 + strategy: + fail-fast: false + matrix: + llvm: + - 19 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Build with JIT and tailcall + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + CC=clang-${{ matrix.llvm }} ./configure --enable-experimental-jit --with-tail-call-interp --with-pydebug + make all --jobs 4 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 908daaf3a6019a..5d5d77f29f6eb1 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -13,13 +13,22 @@ on: - "Lib/test/libregrtest/**" - "Lib/tomllib/**" - "Misc/mypy/**" + - "Tools/build/check_extension_modules.py" + - "Tools/build/check_warnings.py" + - "Tools/build/compute-changes.py" + - "Tools/build/deepfreeze.py" + - "Tools/build/generate-build-details.py" - "Tools/build/generate_sbom.py" + - "Tools/build/generate_stdlib_module_names.py" + - "Tools/build/mypy.ini" + - "Tools/build/umarshal.py" + - "Tools/build/update_file.py" + - "Tools/build/verify_ensurepip_wheels.py" - "Tools/cases_generator/**" - "Tools/clinic/**" - "Tools/jit/**" - "Tools/peg_generator/**" - "Tools/requirements-dev.txt" - - "Tools/wasm/**" workflow_dispatch: permissions: @@ -51,7 +60,6 @@ jobs: "Tools/clinic", "Tools/jit", "Tools/peg_generator", - "Tools/wasm", ] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index d5538cd9367ec6..7773222af5d26f 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -17,6 +17,7 @@ apt-get -yq install \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ + libzstd-dev \ lzma \ lzma-dev \ strace \ diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 73dc254edc5fbc..ce5562f2d51fbb 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -17,24 +17,36 @@ on: # yamllint disable-line rule:truthy # || 'falsy-branch' # }} # - config-hash: - description: Config hash value for use in cache keys - value: ${{ jobs.compute-changes.outputs.config-hash }} # str + run-android: + description: Whether to run the Android tests + value: ${{ jobs.compute-changes.outputs.run-android }} # bool + run-ci-fuzz: + description: Whether to run the CIFuzz job + value: ${{ jobs.compute-changes.outputs.run-ci-fuzz }} # bool run-docs: description: Whether to build the docs value: ${{ jobs.compute-changes.outputs.run-docs }} # bool + run-ios: + description: Whether to run the iOS tests + value: ${{ jobs.compute-changes.outputs.run-ios }} # bool + run-macos: + description: Whether to run the macOS tests + value: ${{ jobs.compute-changes.outputs.run-macos }} # bool run-tests: description: Whether to run the regular tests value: ${{ jobs.compute-changes.outputs.run-tests }} # bool - run-windows-tests: - description: Whether to run the Windows tests - value: ${{ jobs.compute-changes.outputs.run-windows-tests }} # bool + run-ubuntu: + description: Whether to run the Ubuntu tests + value: ${{ jobs.compute-changes.outputs.run-ubuntu }} # bool + run-wasi: + description: Whether to run the WASI tests + value: ${{ jobs.compute-changes.outputs.run-wasi }} # bool run-windows-msi: description: Whether to run the MSI installer smoke tests value: ${{ jobs.compute-changes.outputs.run-windows-msi }} # bool - run-ci-fuzz: - description: Whether to run the CIFuzz job - value: ${{ jobs.compute-changes.outputs.run-ci-fuzz }} # bool + run-windows-tests: + description: Whether to run the Windows tests + value: ${{ jobs.compute-changes.outputs.run-windows-tests }} # bool jobs: compute-changes: @@ -42,10 +54,14 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 outputs: - config-hash: ${{ steps.config-hash.outputs.hash }} + run-android: ${{ steps.changes.outputs.run-android }} run-ci-fuzz: ${{ steps.changes.outputs.run-ci-fuzz }} run-docs: ${{ steps.changes.outputs.run-docs }} + run-ios: ${{ steps.changes.outputs.run-ios }} + run-macos: ${{ steps.changes.outputs.run-macos }} run-tests: ${{ steps.changes.outputs.run-tests }} + run-ubuntu: ${{ steps.changes.outputs.run-ubuntu }} + run-wasi: ${{ steps.changes.outputs.run-wasi }} run-windows-msi: ${{ steps.changes.outputs.run-windows-msi }} run-windows-tests: ${{ steps.changes.outputs.run-windows-tests }} steps: @@ -97,10 +113,6 @@ jobs: run: python Tools/build/compute-changes.py env: GITHUB_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GITHUB_EVENT_NAME: ${{ github.event_name }} CCF_TARGET_REF: ${{ github.base_ref || github.event.repository.default_branch }} CCF_HEAD_REF: ${{ github.event.pull_request.head.sha || github.sha }} - - - name: Compute hash for config cache key - id: config-hash - run: | - echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 657e0a6bf662f7..65154aae4c41d5 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -66,7 +66,7 @@ jobs: run: | set -Eeuo pipefail # Build docs with the nit-picky option; write warnings to file - make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --fail-on-warning --warning-file sphinx-warnings.txt" html + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --warning-file sphinx-warnings.txt" html - name: 'Check warnings' if: github.event_name == 'pull_request' run: | @@ -102,3 +102,30 @@ jobs: # Use "xvfb-run" since some doctest tests open GUI windows - name: 'Run documentation doctest' run: xvfb-run make -C Doc/ PYTHON=../python SPHINXERRORHANDLING="--fail-on-warning" doctest + + check-epub: + name: 'Check EPUB' + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: 'Set up Python' + uses: actions/setup-python@v5 + with: + python-version: '3' + cache: 'pip' + cache-dependency-path: 'Doc/requirements.txt' + - name: 'Install build dependencies' + run: | + make -C Doc/ venv + python -m pip install epubcheck + - name: 'Build EPUB documentation' + run: make -C Doc/ PYTHON=../python epub + - name: 'Run epubcheck' + continue-on-error: true + run: epubcheck Doc/build/epub/Python.epub &> Doc/epubcheck.txt + - run: cat Doc/epubcheck.txt + - name: 'Check for fatal errors in EPUB' + run: python Doc/tools/check-epub.py diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index de0c40221364ad..4f2207c5548e4e 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -3,9 +3,6 @@ name: Reusable macOS on: workflow_call: inputs: - config_hash: - required: true - type: string free-threading: required: false type: boolean @@ -36,11 +33,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} - name: Install Homebrew dependencies run: | brew install pkg-config openssl@3.0 xz gdbm tcl-tk@8 make @@ -60,15 +52,15 @@ jobs: --prefix=/opt/python-dev \ --with-openssl="$(brew --prefix openssl@3.0)" - name: Build CPython - if : ${{ inputs.free-threading || inputs.os != 'macos-13' }} + if : ${{ inputs.free-threading || inputs.os != 'macos-15-intel' }} run: gmake -j8 - name: Build CPython for compiler warning check - if : ${{ !inputs.free-threading && inputs.os == 'macos-13' }} + if : ${{ !inputs.free-threading && inputs.os == 'macos-15-intel' }} run: set -o pipefail; gmake -j8 --output-sync 2>&1 | tee compiler_output_macos.txt - name: Display build info run: make pythoninfo - name: Check compiler warnings - if : ${{ !inputs.free-threading && inputs.os == 'macos-13' }} + if : ${{ !inputs.free-threading && inputs.os == 'macos-15-intel' }} run: >- python3 Tools/build/check_warnings.py --compiler-output-file-path=compiler_output_macos.txt diff --git a/.github/workflows/reusable-san.yml b/.github/workflows/reusable-san.yml new file mode 100644 index 00000000000000..c601d0b73380d4 --- /dev/null +++ b/.github/workflows/reusable-san.yml @@ -0,0 +1,111 @@ +name: Reusable Sanitizer + +on: + workflow_call: + inputs: + sanitizer: + required: true + type: string + free-threading: + description: Whether to use free-threaded mode + required: false + type: boolean + default: false + +env: + FORCE_COLOR: 1 + +jobs: + build-san-reusable: + name: >- + ${{ inputs.sanitizer }}${{ + inputs.free-threading + && ' (free-threading)' + || '' + }} + runs-on: ubuntu-24.04 + timeout-minutes: 60 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Install dependencies + run: | + sudo ./.github/workflows/posix-deps-apt.sh + # Install clang + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + + if [ "${SANITIZER}" = "TSan" ]; then + sudo ./llvm.sh 17 # gh-121946: llvm-18 package is temporarily broken + sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-17 100 + sudo update-alternatives --set clang /usr/bin/clang-17 + sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-17 100 + sudo update-alternatives --set clang++ /usr/bin/clang++-17 + # Reduce ASLR to avoid TSan crashing + sudo sysctl -w vm.mmap_rnd_bits=28 + else + sudo ./llvm.sh 20 + fi + + - name: Sanitizer option setup + run: | + if [ "${SANITIZER}" = "TSan" ]; then + echo "TSAN_OPTIONS=${SAN_LOG_OPTION} suppressions=${GITHUB_WORKSPACE}/Tools/tsan/suppressions${{ + fromJSON(inputs.free-threading) + && '_free_threading' + || '' + }}.txt handle_segv=0" >> "$GITHUB_ENV" + else + echo "UBSAN_OPTIONS=${SAN_LOG_OPTION}" >> "$GITHUB_ENV" + fi + echo "CC=clang" >> "$GITHUB_ENV" + echo "CXX=clang++" >> "$GITHUB_ENV" + env: + SANITIZER: ${{ inputs.sanitizer }} + SAN_LOG_OPTION: log_path=${{ github.workspace }}/san_log + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure CPython + run: >- + ./configure + --config-cache + ${{ + inputs.sanitizer == 'TSan' + && '--with-thread-sanitizer' + || '--with-undefined-behavior-sanitizer' + }} + --with-pydebug + ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: Tests + run: >- + ./python -m test + ${{ inputs.sanitizer == 'TSan' && '--tsan' || '' }} + -j4 + - name: Parallel tests + if: >- + inputs.sanitizer == 'TSan' + && fromJSON(inputs.free-threading) + run: ./python -m test --tsan-parallel --parallel-threads=4 -j4 + - name: Display logs + if: always() + run: find "${GITHUB_WORKSPACE}" -name 'san_log.*' | xargs head -n 1000 + - name: Archive logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: >- + ${{ inputs.sanitizer }}-logs-${{ + fromJSON(inputs.free-threading) + && 'free-threading' + || 'default' + }} + path: san_log.* + if-no-files-found: ignore diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml deleted file mode 100644 index 6a58e5305f8e09..00000000000000 --- a/.github/workflows/reusable-tsan.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: Reusable Thread Sanitizer - -on: - workflow_call: - inputs: - config_hash: - required: true - type: string - free-threading: - description: Whether to use free-threaded mode - required: false - type: boolean - default: false - -env: - FORCE_COLOR: 1 - -jobs: - build-tsan-reusable: - name: 'Thread sanitizer' - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} - - name: Install dependencies - run: | - sudo ./.github/workflows/posix-deps-apt.sh - # Install clang-18 - wget https://apt.llvm.org/llvm.sh - chmod +x llvm.sh - sudo ./llvm.sh 17 # gh-121946: llvm-18 package is temporarily broken - sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-17 100 - sudo update-alternatives --set clang /usr/bin/clang-17 - sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-17 100 - sudo update-alternatives --set clang++ /usr/bin/clang++-17 - # Reduce ASLR to avoid TSAN crashing - sudo sysctl -w vm.mmap_rnd_bits=28 - - name: TSAN option setup - run: | - echo "TSAN_OPTIONS=log_path=${GITHUB_WORKSPACE}/tsan_log suppressions=${GITHUB_WORKSPACE}/Tools/tsan/suppressions${{ - fromJSON(inputs.free-threading) - && '_free_threading' - || '' - }}.txt handle_segv=0" >> "$GITHUB_ENV" - echo "CC=clang" >> "$GITHUB_ENV" - echo "CXX=clang++" >> "$GITHUB_ENV" - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - - name: Configure CPython - run: >- - ./configure - --config-cache - --with-thread-sanitizer - --with-pydebug - ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: Tests - run: ./python -m test --tsan -j4 - - name: Parallel tests - if: fromJSON(inputs.free-threading) - run: ./python -m test --tsan-parallel --parallel-threads=4 -j4 - - name: Display TSAN logs - if: always() - run: find "${GITHUB_WORKSPACE}" -name 'tsan_log.*' | xargs head -n 1000 - - name: Archive TSAN logs - if: always() - uses: actions/upload-artifact@v4 - with: - name: >- - tsan-logs-${{ - fromJSON(inputs.free-threading) - && 'free-threading' - || 'default' - }} - path: tsan_log.* - if-no-files-found: ignore diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 76b19fd5d1a72e..0c1ebe29ae322f 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -3,9 +3,6 @@ name: Reusable Ubuntu on: workflow_call: inputs: - config_hash: - required: true - type: string bolt-optimizations: description: Whether to enable BOLT optimizations required: false @@ -30,7 +27,7 @@ jobs: runs-on: ${{ inputs.os }} timeout-minutes: 60 env: - OPENSSL_VER: 3.0.15 + OPENSSL_VER: 3.0.18 PYTHONSTRICTEXTENSIONBUILD: 1 TERM: linux steps: @@ -64,11 +61,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: Setup directory envs for out-of-tree builds run: | echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" @@ -79,11 +71,6 @@ jobs: run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} # `test_unpickle_module_race` writes to the source directory, which is diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 6beb91e66d4027..0f425ca269e1ba 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -2,10 +2,6 @@ name: Reusable WASI on: workflow_call: - inputs: - config_hash: - required: true - type: string env: FORCE_COLOR: 1 @@ -42,11 +38,6 @@ jobs: mkdir "${WASI_SDK_PATH}" && \ curl -s -S --location "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_VERSION}.0-x86_64-linux.tar.gz" | \ tar --strip-components 1 --directory "${WASI_SDK_PATH}" --extract --gunzip - - name: "Configure ccache action" - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: "Add ccache to PATH" run: echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - name: "Install Python" @@ -55,29 +46,15 @@ jobs: python-version: '3.x' - name: "Runner image version" run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: "Restore Python build config.cache" - uses: actions/cache@v4 - with: - path: ${{ env.CROSS_BUILD_PYTHON }}/config.cache - # Include env.pythonLocation in key to avoid changes in environment when setup-python updates Python. - # Include the hash of `Tools/wasm/wasi.py` as it may change the environment variables. - # (Make sure to keep the key in sync with the other config.cache step below.) - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} - name: "Configure build Python" - run: python3 Tools/wasm/wasi.py configure-build-python -- --config-cache --with-pydebug + run: python3 Tools/wasm/wasi configure-build-python -- --config-cache --with-pydebug - name: "Make build Python" - run: python3 Tools/wasm/wasi.py make-build-python - - name: "Restore host config.cache" - uses: actions/cache@v4 - with: - path: ${{ env.CROSS_BUILD_WASI }}/config.cache - # Should be kept in sync with the other config.cache step above. - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} + run: python3 Tools/wasm/wasi make-build-python - name: "Configure host" # `--with-pydebug` inferred from configure-build-python - run: python3 Tools/wasm/wasi.py configure-host -- --config-cache + run: python3 Tools/wasm/wasi configure-host -- --config-cache - name: "Make host" - run: python3 Tools/wasm/wasi.py make-host + run: python3 Tools/wasm/wasi make-host - name: "Display build info" run: make --directory "${CROSS_BUILD_WASI}" pythoninfo - name: "Test" diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index a50de344bba4da..c95e40a38095f9 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -17,7 +17,7 @@ env: jobs: build: name: installer for ${{ inputs.arch }} - runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-latest' }} + runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-2022' }} timeout-minutes: 60 env: ARCH: ${{ inputs.arch }} diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index 37c802095b0f2f..0648b770753255 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -21,7 +21,7 @@ env: jobs: build: name: Build and test (${{ inputs.arch }}) - runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-latest' }} + runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-2022' }} timeout-minutes: 60 env: ARCH: ${{ inputs.arch }} diff --git a/.github/workflows/tail-call.yml b/.github/workflows/tail-call.yml index 4636372e26c41b..e99e317182eaa6 100644 --- a/.github/workflows/tail-call.yml +++ b/.github/workflows/tail-call.yml @@ -49,16 +49,16 @@ jobs: include: # - target: i686-pc-windows-msvc/msvc # architecture: Win32 -# runner: windows-latest +# runner: windows-2022 - target: x86_64-pc-windows-msvc/msvc architecture: x64 - runner: windows-latest + runner: windows-2022 # - target: aarch64-pc-windows-msvc/msvc # architecture: ARM64 -# runner: windows-latest +# runner: windows-2022 - target: x86_64-apple-darwin/clang architecture: x86_64 - runner: macos-13 + runner: macos-15-intel - target: aarch64-apple-darwin/clang architecture: aarch64 runner: macos-14 @@ -101,21 +101,14 @@ jobs: set LLVMInstallDir=C:\Program Files\LLVM ./PCbuild/build.bat --tail-call-interp -p ${{ matrix.architecture }} - # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966. - # This is a bug in the macOS runner image where the pre-installed Python is installed in the same - # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes - # the symlink to the pre-installed Python so that the Homebrew Python is used instead. - # Note: when a new LLVM is released, the homebrew installation directory changes, so the builds will fail. - # We either need to upgrade LLVM or change the directory being pointed to. - name: Native macOS (release) if: runner.os == 'macOS' run: | brew update - find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete brew install llvm@${{ matrix.llvm }} export SDKROOT="$(xcrun --show-sdk-path)" - export PATH="/usr/local/opt/llvm/bin:$PATH" - export PATH="/opt/homebrew/opt/llvm/bin:$PATH" + export PATH="/usr/local/opt/llvm@${{ matrix.llvm }}/bin:$PATH" + export PATH="/opt/homebrew/opt/llvm@${{ matrix.llvm }}/bin:$PATH" CC=clang-20 ./configure --with-tail-call-interp make all --jobs 4 ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 @@ -137,4 +130,3 @@ jobs: CC=clang-20 ./configure --with-tail-call-interp --disable-gil make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - diff --git a/.gitignore b/.gitignore index 2a6f249275c32e..2ba4e7da62e327 100644 --- a/.gitignore +++ b/.gitignore @@ -71,16 +71,15 @@ Lib/test/data/* /Makefile /Makefile.pre /iOSTestbed.* -iOS/Frameworks/ -iOS/Resources/Info.plist -iOS/testbed/build -iOS/testbed/Python.xcframework/ios-*/bin -iOS/testbed/Python.xcframework/ios-*/include -iOS/testbed/Python.xcframework/ios-*/lib -iOS/testbed/Python.xcframework/ios-*/Python.framework -iOS/testbed/iOSTestbed.xcodeproj/project.xcworkspace -iOS/testbed/iOSTestbed.xcodeproj/xcuserdata -iOS/testbed/iOSTestbed.xcodeproj/xcshareddata +Apple/iOS/Frameworks/ +Apple/iOS/Resources/Info.plist +Apple/testbed/build +Apple/testbed/Python.xcframework/*/bin +Apple/testbed/Python.xcframework/*/include +Apple/testbed/Python.xcframework/*/lib +Apple/testbed/Python.xcframework/*/Python.framework +Apple/testbed/*Testbed.xcodeproj/project.xcworkspace +Apple/testbed/*Testbed.xcodeproj/xcuserdata Mac/Makefile Mac/PythonLauncher/Info.plist Mac/PythonLauncher/Makefile @@ -135,7 +134,6 @@ Tools/unicode/data/ /config.log /config.status /config.status.lineno -# hendrikmuhs/ccache-action@v1 /.ccache /cross-build/ /jit_stencils*.h @@ -171,5 +169,6 @@ Python/frozen_modules/MANIFEST /python !/Python/ -# main branch only: ABI files are not checked/maintained. -Doc/data/python*.abi +# People's custom https://docs.anthropic.com/en/docs/claude-code/memory configs. +/.claude/ +CLAUDE.local.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7ad829c94d50f3..c5767ee841eb0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,23 +1,43 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.8 + rev: v0.13.2 hooks: - - id: ruff + - id: ruff-check + name: Run Ruff (lint) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple/ + - id: ruff-check name: Run Ruff (lint) on Doc/ args: [--exit-non-zero-on-fix] files: ^Doc/ - - id: ruff + - id: ruff-check name: Run Ruff (lint) on Lib/test/ args: [--exit-non-zero-on-fix] files: ^Lib/test/ - - id: ruff + - id: ruff-check name: Run Ruff (lint) on Tools/build/ args: [--exit-non-zero-on-fix, --config=Tools/build/.ruff.toml] files: ^Tools/build/ - - id: ruff + - id: ruff-check + name: Run Ruff (lint) on Tools/i18n/ + args: [--exit-non-zero-on-fix, --config=Tools/i18n/.ruff.toml] + files: ^Tools/i18n/ + - id: ruff-check name: Run Ruff (lint) on Argument Clinic args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml] files: ^Tools/clinic/|Lib/test/test_clinic.py + - id: ruff-check + name: Run Ruff (lint) on Tools/peg_generator/ + args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml] + files: ^Tools/peg_generator/ + - id: ruff-check + name: Run Ruff (lint) on Tools/wasm/ + args: [--exit-non-zero-on-fix, --config=Tools/wasm/.ruff.toml] + files: ^Tools/wasm/ + - id: ruff-format + name: Run Ruff (format) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple - id: ruff-format name: Run Ruff (format) on Doc/ args: [--check] @@ -26,16 +46,26 @@ repos: name: Run Ruff (format) on Tools/build/check_warnings.py args: [--check, --config=Tools/build/.ruff.toml] files: ^Tools/build/check_warnings.py + - id: ruff-format + name: Run Ruff (format) on Tools/wasm/ + args: [--check, --config=Tools/wasm/.ruff.toml] + files: ^Tools/wasm/ - repo: https://github.com/psf/black-pre-commit-mirror - rev: 25.1.0 + rev: 25.9.0 hooks: - id: black name: Run Black on Tools/jit/ files: ^Tools/jit/ + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + - id: remove-tabs + types: [python] + - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-case-conflict - id: check-merge-conflict @@ -43,13 +73,17 @@ repos: exclude: ^Lib/test/test_tomllib/ - id: check-yaml - id: end-of-file-fixer - types: [python] + types_or: [python, yaml] exclude: Lib/test/tokenizedata/coding20731.py + - id: end-of-file-fixer + files: '^\.github/CODEOWNERS$' + - id: trailing-whitespace + types_or: [c, inc, python, rst, yaml] - id: trailing-whitespace - types_or: [c, inc, python, rst] + files: '^\.github/CODEOWNERS|\.(gram)$' - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.33.0 + rev: 0.34.0 hooks: - id: check-dependabot - id: check-github-workflows @@ -61,7 +95,7 @@ repos: - id: actionlint - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.6.0 + rev: v1.14.1 hooks: - id: zizmor diff --git a/.readthedocs.yml b/.readthedocs.yml index a57de00544e4e3..0a2c3f8345367f 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -32,4 +32,3 @@ build: - make -C Doc venv html - mkdir _readthedocs - mv Doc/build/html _readthedocs/html - diff --git a/Android/README.md b/Android/README.md index 6cabd6ba5d6844..9f71aeb934f386 100644 --- a/Android/README.md +++ b/Android/README.md @@ -96,10 +96,12 @@ similar to the `Android` directory of the CPython source tree. ## Testing -The Python test suite can be run on Linux, macOS, or Windows: +The Python test suite can be run on Linux, macOS, or Windows. -* On Linux, the emulator needs access to the KVM virtualization interface, and - a DISPLAY environment variable pointing at an X server. Xvfb is acceptable. +On Linux, the emulator needs access to the KVM virtualization interface. This may +require adding your user to a group, or changing your udev rules. On GitHub +Actions, the test script will do this automatically using the commands shown +[here](https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/). The test suite can usually be run on a device with 2 GB of RAM, but this is borderline, so you may need to increase it to 4 GB. As of Android @@ -156,6 +158,10 @@ repository's `Lib` directory will be picked up immediately. Changes in C files, and architecture-specific files such as sysconfigdata, will not take effect until you re-run `android.py make-host` or `build`. +The testbed app can also be used to test third-party packages. For more details, +run `android.py test --help`, paying attention to the options `--site-packages`, +`--cwd`, `-c` and `-m`. + ## Using in your own app diff --git a/Android/android-env.sh b/Android/android-env.sh index bab4130c9e92d0..5859c0eac4a88f 100644 --- a/Android/android-env.sh +++ b/Android/android-env.sh @@ -3,7 +3,7 @@ : "${HOST:?}" # GNU target triplet # You may also override the following: -: "${api_level:=24}" # Minimum Android API level the build will run on +: "${ANDROID_API_LEVEL:=24}" # Minimum Android API level the build will run on : "${PREFIX:-}" # Path in which to find required libraries @@ -24,7 +24,7 @@ fail() { # * https://android.googlesource.com/platform/ndk/+/ndk-rXX-release/docs/BuildSystemMaintainers.md # where XX is the NDK version. Do a diff against the version you're upgrading from, e.g.: # https://android.googlesource.com/platform/ndk/+/ndk-r25-release..ndk-r26-release/docs/BuildSystemMaintainers.md -ndk_version=27.1.12297006 +ndk_version=27.3.13750724 ndk=$ANDROID_HOME/ndk/$ndk_version if ! [ -e "$ndk" ]; then @@ -43,7 +43,7 @@ fi toolchain=$(echo "$ndk"/toolchains/llvm/prebuilt/*) export AR="$toolchain/bin/llvm-ar" export AS="$toolchain/bin/llvm-as" -export CC="$toolchain/bin/${clang_triplet}${api_level}-clang" +export CC="$toolchain/bin/${clang_triplet}${ANDROID_API_LEVEL}-clang" export CXX="${CC}++" export LD="$toolchain/bin/ld" export NM="$toolchain/bin/llvm-nm" diff --git a/Android/android.py b/Android/android.py index 3f48b42aa17571..d1a10be776ed16 100755 --- a/Android/android.py +++ b/Android/android.py @@ -2,7 +2,9 @@ import asyncio import argparse +import json import os +import platform import re import shlex import shutil @@ -14,7 +16,7 @@ from contextlib import asynccontextmanager from datetime import datetime, timezone from glob import glob -from os.path import basename, relpath +from os.path import abspath, basename, relpath from pathlib import Path from subprocess import CalledProcessError from tempfile import TemporaryDirectory @@ -22,9 +24,14 @@ SCRIPT_NAME = Path(__file__).name ANDROID_DIR = Path(__file__).resolve().parent -CHECKOUT = ANDROID_DIR.parent +PYTHON_DIR = ANDROID_DIR.parent +in_source_tree = ( + ANDROID_DIR.name == "Android" and (PYTHON_DIR / "pyconfig.h.in").exists() +) + +ENV_SCRIPT = ANDROID_DIR / "android-env.sh" TESTBED_DIR = ANDROID_DIR / "testbed" -CROSS_BUILD_DIR = CHECKOUT / "cross-build" +CROSS_BUILD_DIR = PYTHON_DIR / "cross-build" HOSTS = ["aarch64-linux-android", "x86_64-linux-android"] APP_ID = "org.python.testbed" @@ -46,7 +53,19 @@ + (".bat" if os.name == "nt" else "") ) -logcat_started = False +# Whether we've seen any output from Python yet. +python_started = False + +# Buffer for verbose output which will be displayed only if a test fails and +# there has been no output from Python. +hidden_output = [] + + +def log_verbose(context, line, stream=sys.stdout): + if context.verbose: + stream.write(line) + else: + hidden_output.append((stream, line)) def delete_glob(pattern): @@ -76,39 +95,67 @@ def run(command, *, host=None, env=None, log=True, **kwargs): kwargs.setdefault("check", True) if env is None: env = os.environ.copy() - original_env = env.copy() if host: - env_script = ANDROID_DIR / "android-env.sh" - env_output = subprocess.run( - f"set -eu; " - f"HOST={host}; " - f"PREFIX={subdir(host)}/prefix; " - f". {env_script}; " - f"export", - check=True, shell=True, text=True, stdout=subprocess.PIPE - ).stdout - - for line in env_output.splitlines(): - # We don't require every line to match, as there may be some other - # output from installing the NDK. - if match := re.search( - "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line - ): - key, value = match[2], match[3] - if env.get(key) != value: - print(line) - env[key] = value - - if env == original_env: - raise ValueError(f"Found no variables in {env_script.name} output:\n" - + env_output) + host_env = android_env(host) + print_env(host_env) + env.update(host_env) if log: - print(">", " ".join(map(str, command))) + print(">", join_command(command)) return subprocess.run(command, env=env, **kwargs) +# Format a command so it can be copied into a shell. Like shlex.join, but also +# accepts arguments which are Paths, or a single string/Path outside of a list. +def join_command(args): + if isinstance(args, (str, Path)): + return str(args) + else: + return shlex.join(map(str, args)) + + +# Format the environment so it can be pasted into a shell. +def print_env(env): + for key, value in sorted(env.items()): + print(f"export {key}={shlex.quote(value)}") + + +def android_env(host): + if host: + prefix = subdir(host) / "prefix" + else: + prefix = ANDROID_DIR / "prefix" + sysconfig_files = prefix.glob("lib/python*/_sysconfigdata__android_*.py") + sysconfig_filename = next(sysconfig_files).name + host = re.fullmatch(r"_sysconfigdata__android_(.+).py", sysconfig_filename)[1] + + env_output = subprocess.run( + f"set -eu; " + f"HOST={host}; " + f"PREFIX={prefix}; " + f". {ENV_SCRIPT}; " + f"export", + check=True, shell=True, capture_output=True, encoding='utf-8', + ).stdout + + env = {} + for line in env_output.splitlines(): + # We don't require every line to match, as there may be some other + # output from installing the NDK. + if match := re.search( + "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line + ): + key, value = match[2], match[3] + if os.environ.get(key) != value: + env[key] = value + + if not env: + raise ValueError(f"Found no variables in {ENV_SCRIPT.name} output:\n" + + env_output) + return env + + def build_python_path(): """The path to the build Python binary.""" build_dir = subdir("build") @@ -127,7 +174,7 @@ def configure_build_python(context): clean("build") os.chdir(subdir("build", create=True)) - command = [relpath(CHECKOUT / "configure")] + command = [relpath(PYTHON_DIR / "configure")] if context.args: command.extend(context.args) run(command) @@ -138,13 +185,20 @@ def make_build_python(context): run(["make", "-j", str(os.cpu_count())]) +# To create new builds of these dependencies, usually all that's necessary is to +# push a tag to the cpython-android-source-deps repository, and GitHub Actions +# will do the rest. +# +# If you're a member of the Python core team, and you'd like to be able to push +# these tags yourself, please contact Malcolm Smith or Russell Keith-Magee. def unpack_deps(host, prefix_dir): + os.chdir(prefix_dir) deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" - for name_ver in ["bzip2-1.0.8-2", "libffi-3.4.4-3", "openssl-3.0.15-4", - "sqlite-3.49.1-0", "xz-5.4.6-1"]: + for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.18-0", + "sqlite-3.50.4-0", "xz-5.4.6-1", "zstd-1.5.7-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") - shutil.unpack_archive(filename, prefix_dir) + shutil.unpack_archive(filename) os.remove(filename) @@ -167,7 +221,7 @@ def configure_host_python(context): os.chdir(host_dir) command = [ # Basic cross-compiling configuration - relpath(CHECKOUT / "configure"), + relpath(PYTHON_DIR / "configure"), f"--host={context.host}", f"--build={sysconfig.get_config_var('BUILD_GNU_TYPE')}", f"--with-build-python={build_python_path()}", @@ -196,9 +250,18 @@ def make_host_python(context): for pattern in ("include/python*", "lib/libpython*", "lib/python*"): delete_glob(f"{prefix_dir}/{pattern}") + # The Android environment variables were already captured in the Makefile by + # `configure`, and passing them again when running `make` may cause some + # flags to be duplicated. So we don't use the `host` argument here. os.chdir(host_dir) - run(["make", "-j", str(os.cpu_count())], host=context.host) - run(["make", "install", f"prefix={prefix_dir}"], host=context.host) + run(["make", "-j", str(os.cpu_count())]) + + # The `make install` output is very verbose and rarely useful, so + # suppress it by default. + run( + ["make", "install", f"prefix={prefix_dir}"], + capture_output=not context.verbose, + ) def build_all(context): @@ -217,6 +280,33 @@ def clean_all(context): clean(host) +def setup_ci(): + if "GITHUB_ACTIONS" in os.environ: + # Enable emulator hardware acceleration + # (https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/). + if platform.system() == "Linux": + run( + ["sudo", "tee", "/etc/udev/rules.d/99-kvm4all.rules"], + input='KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"\n', + text=True, + ) + run(["sudo", "udevadm", "control", "--reload-rules"]) + run(["sudo", "udevadm", "trigger", "--name-match=kvm"]) + + # Free up disk space by deleting unused versions of the NDK + # (https://github.com/freakboy3742/pyspamsum/pull/108). + for line in ENV_SCRIPT.read_text().splitlines(): + if match := re.fullmatch(r"ndk_version=(.+)", line): + ndk_version = match[1] + break + else: + raise ValueError(f"Failed to find NDK version in {ENV_SCRIPT.name}") + + for item in (android_home / "ndk").iterdir(): + if item.name[0].isdigit() and item.name != ndk_version: + delete_glob(item) + + def setup_sdk(): sdkmanager = android_home / ( "cmdline-tools/latest/bin/sdkmanager" @@ -228,7 +318,12 @@ def setup_sdk(): if not all((android_home / "licenses" / path).exists() for path in [ "android-sdk-arm-dbt-license", "android-sdk-license" ]): - run([sdkmanager, "--licenses"], text=True, input="y\n" * 100) + run( + [sdkmanager, "--licenses"], + text=True, + capture_output=True, + input="y\n" * 100, + ) # Gradle may install this automatically, but we can't rely on that because # we need to run adb within the logcat task. @@ -411,17 +506,19 @@ async def logcat_task(context, initial_devices): # `--pid` requires API level 24 or higher. args = [adb, "-s", serial, "logcat", "--pid", pid, "--format", "tag"] - hidden_output = [] + logcat_started = False async with async_process( *args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) as process: while line := (await process.stdout.readline()).decode(*DECODE_ARGS): if match := re.fullmatch(r"([A-Z])/(.*)", line, re.DOTALL): + logcat_started = True level, message = match.groups() else: - # If the regex doesn't match, this is probably the second or - # subsequent line of a multi-line message. Python won't produce - # such messages, but other components might. + # If the regex doesn't match, this is either a logcat startup + # error, or the second or subsequent line of a multi-line + # message. Python won't produce multi-line messages, but other + # components might. level, message = None, line # Exclude high-volume messages which are rarely useful. @@ -441,25 +538,22 @@ async def logcat_task(context, initial_devices): # tag indicators from Python's stdout and stderr. for prefix in ["python.stdout: ", "python.stderr: "]: if message.startswith(prefix): - global logcat_started - logcat_started = True + global python_started + python_started = True stream.write(message.removeprefix(prefix)) break else: - if context.verbose: - # Non-Python messages add a lot of noise, but they may - # sometimes help explain a failure. - stream.write(line) - else: - hidden_output.append(line) + # Non-Python messages add a lot of noise, but they may + # sometimes help explain a failure. + log_verbose(context, line, stream) # If the device disconnects while logcat is running, which always # happens in --managed mode, some versions of adb return non-zero. # Distinguish this from a logcat startup error by checking whether we've - # received a message from Python yet. + # received any logcat messages yet. status = await wait_for(process.wait(), timeout=1) if status != 0 and not logcat_started: - raise CalledProcessError(status, args, "".join(hidden_output)) + raise CalledProcessError(status, args) def stop_app(serial): @@ -474,12 +568,38 @@ async def gradle_task(context): task_prefix = "connected" env["ANDROID_SERIAL"] = context.connected + if context.ci_mode: + context.args[0:0] = [ + # See _add_ci_python_opts in libregrtest/main.py. + "-W", "error", "-bb", "-E", + + # Randomization is disabled because order-dependent failures are + # much less likely to pass on a rerun in single-process mode. + "-m", "test", + f"--{context.ci_mode}-ci", "--single-process", "--no-randomize" + ] + + if not any(arg in context.args for arg in ["-c", "-m"]): + context.args[0:0] = ["-m", "test"] + args = [ gradlew, "--console", "plain", f"{task_prefix}DebugAndroidTest", - "-Pandroid.testInstrumentationRunnerArguments.pythonArgs=" - + shlex.join(context.args), + ] + [ + f"-P{name}={value}" + for name, value in [ + ("python.sitePackages", context.site_packages), + ("python.cwd", context.cwd), + ( + "android.testInstrumentationRunnerArguments.pythonArgs", + json.dumps(context.args), + ), + ] + if value ] - hidden_output = [] + if context.verbose >= 2: + args.append("--info") + log_verbose(context, f"> {join_command(args)}\n") + try: async with async_process( *args, cwd=TESTBED_DIR, env=env, @@ -488,10 +608,10 @@ async def gradle_task(context): while line := (await process.stdout.readline()).decode(*DECODE_ARGS): # Gradle may take several minutes to install SDK packages, so # it's worth showing those messages even in non-verbose mode. - if context.verbose or line.startswith('Preparing "Install'): + if line.startswith('Preparing "Install'): sys.stdout.write(line) else: - hidden_output.append(line) + log_verbose(context, line) status = await wait_for(process.wait(), timeout=1) if status == 0: @@ -499,17 +619,13 @@ async def gradle_task(context): else: raise CalledProcessError(status, args) finally: - # If logcat never started, then something has gone badly wrong, so the - # user probably wants to see the Gradle output even in non-verbose mode. - if hidden_output and not logcat_started: - sys.stdout.write("".join(hidden_output)) - # Gradle does not stop the tests when interrupted. if context.connected: stop_app(context.connected) async def run_testbed(context): + setup_ci() setup_sdk() setup_testbed() @@ -533,6 +649,12 @@ async def run_testbed(context): except* MySystemExit as e: raise SystemExit(*e.exceptions[0].args) from None except* CalledProcessError as e: + # If Python produced no output, then the user probably wants to see the + # verbose output to explain why the test failed. + if not python_started: + for stream, line in hidden_output: + stream.write(line) + # Extract it from the ExceptionGroup so it can be handled by `main`. raise e.exceptions[0] @@ -597,11 +719,64 @@ def package(context): else: shutil.copy2(src, dst, follow_symlinks=False) + # Strip debug information. + if not context.debug: + so_files = glob(f"{temp_dir}/**/*.so", recursive=True) + run([android_env(context.host)["STRIP"], *so_files], log=False) + dist_dir = subdir(context.host, "dist", create=True) package_path = shutil.make_archive( f"{dist_dir}/python-{version}-{context.host}", "gztar", temp_dir ) print(f"Wrote {package_path}") + return package_path + + +def ci(context): + for step in [ + configure_build_python, + make_build_python, + configure_host_python, + make_host_python, + package, + ]: + caption = ( + step.__name__.replace("_", " ") + .capitalize() + .replace("python", "Python") + ) + print(f"::group::{caption}") + result = step(context) + if step is package: + package_path = result + print("::endgroup::") + + if ( + "GITHUB_ACTIONS" in os.environ + and (platform.system(), platform.machine()) != ("Linux", "x86_64") + ): + print( + "Skipping tests: GitHub Actions does not support the Android " + "emulator on this platform." + ) + else: + with TemporaryDirectory(prefix=SCRIPT_NAME) as temp_dir: + print("::group::Tests") + + # Prove the package is self-contained by using it to run the tests. + shutil.unpack_archive(package_path, temp_dir) + launcher_args = [ + "--managed", "maxVersion", "-v", f"--{context.ci_mode}-ci" + ] + run( + ["./android.py", "test", *launcher_args], + cwd=temp_dir + ) + print("::endgroup::") + + +def env(context): + print_env(android_env(getattr(context, "host", None))) # Handle SIGTERM the same way as SIGINT. This ensures that if we're terminated @@ -615,45 +790,54 @@ def signal_handler(*args): def parse_args(): parser = argparse.ArgumentParser() - subcommands = parser.add_subparsers(dest="subcommand") + subcommands = parser.add_subparsers(dest="subcommand", required=True) + + def add_parser(*args, **kwargs): + parser = subcommands.add_parser(*args, **kwargs) + parser.add_argument( + "-v", "--verbose", action="count", default=0, + help="Show verbose output. Use twice to be even more verbose.") + return parser # Subcommands - build = subcommands.add_parser("build", help="Build everything") - configure_build = subcommands.add_parser("configure-build", - help="Run `configure` for the " - "build Python") - make_build = subcommands.add_parser("make-build", - help="Run `make` for the build Python") - configure_host = subcommands.add_parser("configure-host", - help="Run `configure` for Android") - make_host = subcommands.add_parser("make-host", - help="Run `make` for Android") - subcommands.add_parser( - "clean", help="Delete all build and prefix directories") - subcommands.add_parser( - "build-testbed", help="Build the testbed app") - test = subcommands.add_parser( - "test", help="Run the test suite") - package = subcommands.add_parser("package", help="Make a release package") + build = add_parser( + "build", help="Run configure-build, make-build, configure-host and " + "make-host") + configure_build = add_parser( + "configure-build", help="Run `configure` for the build Python") + add_parser( + "make-build", help="Run `make` for the build Python") + configure_host = add_parser( + "configure-host", help="Run `configure` for Android") + make_host = add_parser( + "make-host", help="Run `make` for Android") + + add_parser("clean", help="Delete all build directories") + add_parser("build-testbed", help="Build the testbed app") + test = add_parser("test", help="Run the testbed app") + package = add_parser("package", help="Make a release package") + ci = add_parser("ci", help="Run build, package and test") + env = add_parser("env", help="Print environment variables") # Common arguments - for subcommand in build, configure_build, configure_host: + for subcommand in [build, configure_build, configure_host, ci]: subcommand.add_argument( "--clean", action="store_true", default=False, dest="clean", - help="Delete the relevant build and prefix directories first") - for subcommand in [build, configure_host, make_host, package]: + help="Delete the relevant build directories first") + + host_commands = [build, configure_host, make_host, package, ci] + if in_source_tree: + host_commands.append(env) + for subcommand in host_commands: subcommand.add_argument( "host", metavar="HOST", choices=HOSTS, help="Host triplet: choices=[%(choices)s]") - for subcommand in build, configure_build, configure_host: + + for subcommand in [build, configure_build, configure_host, ci]: subcommand.add_argument("args", nargs="*", help="Extra arguments to pass to `configure`") # Test arguments - test.add_argument( - "-v", "--verbose", action="count", default=0, - help="Show Gradle output, and non-Python logcat messages. " - "Use twice to include high-volume messages which are rarely useful.") device_group = test.add_mutually_exclusive_group(required=True) device_group.add_argument( "--connected", metavar="SERIAL", help="Run on a connected device. " @@ -661,9 +845,34 @@ def parse_args(): device_group.add_argument( "--managed", metavar="NAME", help="Run on a Gradle-managed device. " "These are defined in `managedDevices` in testbed/app/build.gradle.kts.") + + test.add_argument( + "--site-packages", metavar="DIR", type=abspath, + help="Directory to copy as the app's site-packages.") + test.add_argument( + "--cwd", metavar="DIR", type=abspath, + help="Directory to copy as the app's working directory.") test.add_argument( - "args", nargs="*", help=f"Arguments for `python -m test`. " - f"Separate them from {SCRIPT_NAME}'s own arguments with `--`.") + "args", nargs="*", help=f"Python command-line arguments. " + f"Separate them from {SCRIPT_NAME}'s own arguments with `--`. " + f"If neither -c nor -m are included, `-m test` will be prepended, " + f"which will run Python's own test suite.") + + # Package arguments. + for subcommand in [package, ci]: + subcommand.add_argument( + "-g", action="store_true", default=False, dest="debug", + help="Include debug information in package") + + # CI arguments + for subcommand in [test, ci]: + group = subcommand.add_mutually_exclusive_group(required=subcommand is ci) + group.add_argument( + "--fast-ci", action="store_const", dest="ci_mode", const="fast", + help="Add test arguments for GitHub Actions") + group.add_argument( + "--slow-ci", action="store_const", dest="ci_mode", const="slow", + help="Add test arguments for buildbots") return parser.parse_args() @@ -688,6 +897,8 @@ def main(): "build-testbed": build_testbed, "test": run_testbed, "package": package, + "ci": ci, + "env": env, } try: @@ -702,20 +913,17 @@ def main(): def print_called_process_error(e): for stream_name in ["stdout", "stderr"]: content = getattr(e, stream_name) + if isinstance(content, bytes): + content = content.decode(*DECODE_ARGS) stream = getattr(sys, stream_name) if content: stream.write(content) if not content.endswith("\n"): stream.write("\n") - # Format the command so it can be copied into a shell. shlex uses single - # quotes, so we surround the whole command with double quotes. - args_joined = ( - e.cmd if isinstance(e.cmd, str) - else " ".join(shlex.quote(str(arg)) for arg in e.cmd) - ) + # shlex uses single quotes, so we surround the command with double quotes. print( - f'Command "{args_joined}" returned exit status {e.returncode}' + f'Command "{join_command(e.cmd)}" returned exit status {e.returncode}' ) diff --git a/Android/testbed/app/build.gradle.kts b/Android/testbed/app/build.gradle.kts index c627cb1b0e0b22..14d43d8c4d5c42 100644 --- a/Android/testbed/app/build.gradle.kts +++ b/Android/testbed/app/build.gradle.kts @@ -47,7 +47,7 @@ for ((i, prefix) in prefixes.withIndex()) { val libDir = file("$prefix/lib") val version = run { for (filename in libDir.list()!!) { - """python(\d+\.\d+)""".toRegex().matchEntire(filename)?.let { + """python(\d+\.\d+[a-z]*)""".toRegex().matchEntire(filename)?.let { return@run it.groupValues[1] } } @@ -64,9 +64,10 @@ for ((i, prefix) in prefixes.withIndex()) { val libPythonDir = file("$libDir/python$pythonVersion") val triplet = run { for (filename in libPythonDir.list()!!) { - """_sysconfigdata__android_(.+).py""".toRegex().matchEntire(filename)?.let { - return@run it.groupValues[1] - } + """_sysconfigdata_[a-z]*_android_(.+).py""".toRegex() + .matchEntire(filename)?.let { + return@run it.groupValues[1] + } } throw GradleException("Failed to find Python triplet in $libPythonDir") } @@ -78,20 +79,20 @@ android { val androidEnvFile = file("../../android-env.sh").absoluteFile namespace = "org.python.testbed" - compileSdk = 34 + compileSdk = 35 defaultConfig { applicationId = "org.python.testbed" minSdk = androidEnvFile.useLines { for (line in it) { - """api_level:=(\d+)""".toRegex().find(line)?.let { + """ANDROID_API_LEVEL:=(\d+)""".toRegex().find(line)?.let { return@useLines it.groupValues[1].toInt() } } throw GradleException("Failed to find API level in $androidEnvFile") } - targetSdk = 34 + targetSdk = 35 versionCode = 1 versionName = "1.0" @@ -205,11 +206,29 @@ androidComponents.onVariants { variant -> into("site-packages") { from("$projectDir/src/main/python") + + val sitePackages = findProperty("python.sitePackages") as String? + if (!sitePackages.isNullOrEmpty()) { + if (!file(sitePackages).exists()) { + throw GradleException("$sitePackages does not exist") + } + from(sitePackages) + } } duplicatesStrategy = DuplicatesStrategy.EXCLUDE exclude("**/__pycache__") } + + into("cwd") { + val cwd = findProperty("python.cwd") as String? + if (!cwd.isNullOrEmpty()) { + if (!file(cwd).exists()) { + throw GradleException("$cwd does not exist") + } + from(cwd) + } + } } } diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt index 0e888ab71d87da..e57243566f91dc 100644 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -17,11 +17,11 @@ class PythonSuite { fun testPython() { val start = System.currentTimeMillis() try { - val context = + val status = PythonTestRunner( InstrumentationRegistry.getInstrumentation().targetContext - val args = - InstrumentationRegistry.getArguments().getString("pythonArgs", "") - val status = PythonTestRunner(context).run(args) + ).run( + InstrumentationRegistry.getArguments().getString("pythonArgs")!!, + ) assertEquals(0, status) } finally { // Make sure the process lives long enough for the test script to diff --git a/Android/testbed/app/src/main/c/main_activity.c b/Android/testbed/app/src/main/c/main_activity.c index ec7f93a3e5ee13..7f024f0a348b61 100644 --- a/Android/testbed/app/src/main/c/main_activity.c +++ b/Android/testbed/app/src/main/c/main_activity.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,13 @@ static void throw_runtime_exception(JNIEnv *env, const char *message) { message); } +static void throw_errno(JNIEnv *env, const char *error_prefix) { + char error_message[1024]; + snprintf(error_message, sizeof(error_message), + "%s: %s", error_prefix, strerror(errno)); + throw_runtime_exception(env, error_message); +} + // --- Stdio redirection ------------------------------------------------------ @@ -95,10 +103,7 @@ JNIEXPORT void JNICALL Java_org_python_testbed_PythonTestRunner_redirectStdioToL for (StreamInfo *si = STREAMS; si->file; si++) { char *error_prefix; if ((error_prefix = redirect_stream(si))) { - char error_message[1024]; - snprintf(error_message, sizeof(error_message), - "%s: %s", error_prefix, strerror(errno)); - throw_runtime_exception(env, error_message); + throw_errno(env, error_prefix); return; } } @@ -107,13 +112,38 @@ JNIEXPORT void JNICALL Java_org_python_testbed_PythonTestRunner_redirectStdioToL // --- Python initialization --------------------------------------------------- -static PyStatus set_config_string( - JNIEnv *env, PyConfig *config, wchar_t **config_str, jstring value -) { - const char *value_utf8 = (*env)->GetStringUTFChars(env, value, NULL); - PyStatus status = PyConfig_SetBytesString(config, config_str, value_utf8); - (*env)->ReleaseStringUTFChars(env, value, value_utf8); - return status; +static char *init_signals() { + // Some tests use SIGUSR1, but that's blocked by default in an Android app in + // order to make it available to `sigwait` in the Signal Catcher thread. + // (https://cs.android.com/android/platform/superproject/+/android14-qpr3-release:art/runtime/signal_catcher.cc). + // That thread's functionality is only useful for debugging the JVM, so disabling + // it should not weaken the tests. + // + // There's no safe way of stopping the thread completely (#123982), but simply + // unblocking SIGUSR1 is enough to fix most tests. + // + // However, in tests that generate multiple different signals in quick + // succession, it's possible for SIGUSR1 to arrive while the main thread is busy + // running the C-level handler for a different signal. In that case, the SIGUSR1 + // may be sent to the Signal Catcher thread instead, which will generate a log + // message containing the text "reacting to signal". + // + // Such tests may need to be changed in one of the following ways: + // * Use a signal other than SIGUSR1 (e.g. test_stress_delivery_simultaneous in + // test_signal.py). + // * Send the signal to a specific thread rather than the whole process (e.g. + // test_signals in test_threadsignals.py. + sigset_t set; + if (sigemptyset(&set)) { + return "sigemptyset"; + } + if (sigaddset(&set, SIGUSR1)) { + return "sigaddset"; + } + if ((errno = pthread_sigmask(SIG_UNBLOCK, &set, NULL))) { + return "pthread_sigmask"; + } + return NULL; } static void throw_status(JNIEnv *env, PyStatus status) { @@ -121,27 +151,47 @@ static void throw_status(JNIEnv *env, PyStatus status) { } JNIEXPORT int JNICALL Java_org_python_testbed_PythonTestRunner_runPython( - JNIEnv *env, jobject obj, jstring home, jstring runModule + JNIEnv *env, jobject obj, jstring home, jarray args ) { + const char *home_utf8 = (*env)->GetStringUTFChars(env, home, NULL); + char cwd[PATH_MAX]; + snprintf(cwd, sizeof(cwd), "%s/%s", home_utf8, "cwd"); + if (chdir(cwd)) { + throw_errno(env, "chdir"); + return 1; + } + + char *error_prefix; + if ((error_prefix = init_signals())) { + throw_errno(env, error_prefix); + return 1; + } + PyConfig config; PyStatus status; - PyConfig_InitIsolatedConfig(&config); + PyConfig_InitPythonConfig(&config); - status = set_config_string(env, &config, &config.home, home); - if (PyStatus_Exception(status)) { + jsize argc = (*env)->GetArrayLength(env, args); + const char *argv[argc + 1]; + for (int i = 0; i < argc; i++) { + jobject arg = (*env)->GetObjectArrayElement(env, args, i); + argv[i] = (*env)->GetStringUTFChars(env, arg, NULL); + } + argv[argc] = NULL; + + // PyConfig_SetBytesArgv "must be called before other methods, since the + // preinitialization configuration depends on command line arguments" + if (PyStatus_Exception(status = PyConfig_SetBytesArgv(&config, argc, (char**)argv))) { throw_status(env, status); return 1; } - status = set_config_string(env, &config, &config.run_module, runModule); + status = PyConfig_SetBytesString(&config, &config.home, home_utf8); if (PyStatus_Exception(status)) { throw_status(env, status); return 1; } - // Some tests generate SIGPIPE and SIGXFSZ, which should be ignored. - config.install_signal_handlers = 1; - status = Py_InitializeFromConfig(&config); if (PyStatus_Exception(status)) { throw_status(env, status); diff --git a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt index c4bf6cbe83d8cd..5727b0fe6c30c0 100644 --- a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt +++ b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt @@ -5,6 +5,7 @@ import android.os.* import android.system.Os import android.widget.TextView import androidx.appcompat.app.* +import org.json.JSONArray import java.io.* @@ -15,18 +16,25 @@ class MainActivity : AppCompatActivity() { override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) setContentView(R.layout.activity_main) - val status = PythonTestRunner(this).run("-W -uall") + val status = PythonTestRunner(this).run("""["-m", "test", "-W", "-uall"]""") findViewById(R.id.tvHello).text = "Exit status $status" } } class PythonTestRunner(val context: Context) { - /** @param args Extra arguments for `python -m test`. - * @return The Python exit status: zero if the tests passed, nonzero if - * they failed. */ - fun run(args: String = "") : Int { - Os.setenv("PYTHON_ARGS", args, true) + /** Run Python. + * + * @param args Python command-line, encoded as JSON. + * @return The Python exit status: zero on success, nonzero on failure. */ + fun run(args: String) : Int { + // We leave argument 0 as an empty string, which is a placeholder for the + // executable name in embedded mode. + val argsJsonArray = JSONArray(args) + val argsStringArray = Array(argsJsonArray.length() + 1) { it -> ""} + for (i in 0..) : Int } diff --git a/Android/testbed/app/src/main/python/main.py b/Android/testbed/app/src/main/python/main.py deleted file mode 100644 index d6941b14412fcc..00000000000000 --- a/Android/testbed/app/src/main/python/main.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import runpy -import shlex -import signal -import sys - -# Some tests use SIGUSR1, but that's blocked by default in an Android app in -# order to make it available to `sigwait` in the Signal Catcher thread. -# (https://cs.android.com/android/platform/superproject/+/android14-qpr3-release:art/runtime/signal_catcher.cc). -# That thread's functionality is only useful for debugging the JVM, so disabling -# it should not weaken the tests. -# -# There's no safe way of stopping the thread completely (#123982), but simply -# unblocking SIGUSR1 is enough to fix most tests. -# -# However, in tests that generate multiple different signals in quick -# succession, it's possible for SIGUSR1 to arrive while the main thread is busy -# running the C-level handler for a different signal. In that case, the SIGUSR1 -# may be sent to the Signal Catcher thread instead, which will generate a log -# message containing the text "reacting to signal". -# -# Such tests may need to be changed in one of the following ways: -# * Use a signal other than SIGUSR1 (e.g. test_stress_delivery_simultaneous in -# test_signal.py). -# * Send the signal to a specific thread rather than the whole process (e.g. -# test_signals in test_threadsignals.py. -signal.pthread_sigmask(signal.SIG_UNBLOCK, [signal.SIGUSR1]) - -sys.argv[1:] = shlex.split(os.environ["PYTHON_ARGS"]) - -# The test module will call sys.exit to indicate whether the tests passed. -runpy.run_module("test") diff --git a/Android/testbed/build.gradle.kts b/Android/testbed/build.gradle.kts index 4d1d6f87594da3..451517b3f1aeab 100644 --- a/Android/testbed/build.gradle.kts +++ b/Android/testbed/build.gradle.kts @@ -1,5 +1,5 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id("com.android.application") version "8.6.1" apply false + id("com.android.application") version "8.10.0" apply false id("org.jetbrains.kotlin.android") version "1.9.22" apply false } diff --git a/Android/testbed/gradle/wrapper/gradle-wrapper.properties b/Android/testbed/gradle/wrapper/gradle-wrapper.properties index 36529c896426b0..5d42fbae084da1 100644 --- a/Android/testbed/gradle/wrapper/gradle-wrapper.properties +++ b/Android/testbed/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Mon Feb 19 20:29:06 GMT 2024 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/Apple/.ruff.toml b/Apple/.ruff.toml new file mode 100644 index 00000000000000..4cdc39ebee4be9 --- /dev/null +++ b/Apple/.ruff.toml @@ -0,0 +1,22 @@ +extend = "../.ruff.toml" # Inherit the project-wide settings + +[format] +preview = true +docstring-code-format = true + +[lint] +select = [ + "C4", # flake8-comprehensions + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RUF100", # Ban unused `# noqa` comments + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] diff --git a/Apple/__main__.py b/Apple/__main__.py new file mode 100644 index 00000000000000..256966e76c2c97 --- /dev/null +++ b/Apple/__main__.py @@ -0,0 +1,1065 @@ +#!/usr/bin/env python3 +########################################################################## +# Apple XCframework build script +# +# This script simplifies the process of configuring, compiling and packaging an +# XCframework for an Apple platform. +# +# At present, it only supports iOS, but it has been constructed so that it +# could be used on any Apple platform. +# +# The simplest entry point is: +# +# $ python Apple ci iOS +# +# which will: +# * Clean any pre-existing build artefacts +# * Configure and make a Python that can be used for the build +# * Configure and make a Python for each supported iOS architecture and ABI +# * Combine the outputs of the builds from the previous step into a single +# XCframework, merging binaries into a "fat" binary if necessary +# * Clone a copy of the testbed, configured to use the XCframework +# * Construct a tarball containing the release artefacts +# * Run the test suite using the generated XCframework. +# +# This is the complete sequence that would be needed in CI to build and test +# a candidate release artefact. +# +# Each individual step can be invoked individually - there are commands to +# clean, configure-build, make-build, configure-host, make-host, package, and +# test. +# +# There is also a build command that can be used to combine the configure and +# make steps for the build Python, an individual host, all hosts, or all +# builds. +########################################################################## +from __future__ import annotations + +import argparse +import os +import platform +import re +import shlex +import shutil +import signal +import subprocess +import sys +import sysconfig +import time +from collections.abc import Callable, Sequence +from contextlib import contextmanager +from datetime import datetime, timezone +from os.path import basename, relpath +from pathlib import Path +from subprocess import CalledProcessError + +EnvironmentT = dict[str, str] +ArgsT = Sequence[str | Path] + +SCRIPT_NAME = Path(__file__).name +PYTHON_DIR = Path(__file__).resolve().parent.parent + +CROSS_BUILD_DIR = PYTHON_DIR / "cross-build" + +HOSTS: dict[str, dict[str, dict[str, str]]] = { + # Structure of this data: + # * Platform identifier + # * an XCframework slice that must exist for that platform + # * a host triple: the multiarch spec for that host + "iOS": { + "ios-arm64": { + "arm64-apple-ios": "arm64-iphoneos", + }, + "ios-arm64_x86_64-simulator": { + "arm64-apple-ios-simulator": "arm64-iphonesimulator", + "x86_64-apple-ios-simulator": "x86_64-iphonesimulator", + }, + }, +} + + +def subdir(name: str, create: bool = False) -> Path: + """Ensure that a cross-build directory for the given name exists.""" + path = CROSS_BUILD_DIR / name + if not path.exists(): + if not create: + sys.exit( + f"{path} does not exist. Create it by running the appropriate " + f"`configure` subcommand of {SCRIPT_NAME}." + ) + else: + path.mkdir(parents=True) + return path + + +def run( + command: ArgsT, + *, + host: str | None = None, + env: EnvironmentT | None = None, + log: bool | None = True, + **kwargs, +) -> subprocess.CompletedProcess: + """Run a command in an Apple development environment. + + Optionally logs the executed command to the console. + """ + kwargs.setdefault("check", True) + if env is None: + env = os.environ.copy() + + if host: + host_env = apple_env(host) + print_env(host_env) + env.update(host_env) + + if log: + print(">", join_command(command)) + return subprocess.run(command, env=env, **kwargs) + + +def join_command(args: str | Path | ArgsT) -> str: + """Format a command so it can be copied into a shell. + + Similar to `shlex.join`, but also accepts arguments which are Paths, or a + single string/Path outside of a list. + """ + if isinstance(args, (str, Path)): + return str(args) + else: + return shlex.join(map(str, args)) + + +def print_env(env: EnvironmentT) -> None: + """Format the environment so it can be pasted into a shell.""" + for key, value in sorted(env.items()): + print(f"export {key}={shlex.quote(value)}") + + +def apple_env(host: str) -> EnvironmentT: + """Construct an Apple development environment for the given host.""" + env = { + "PATH": ":".join([ + str(PYTHON_DIR / "Apple/iOS/Resources/bin"), + str(subdir(host) / "prefix"), + "/usr/bin", + "/bin", + "/usr/sbin", + "/sbin", + "/Library/Apple/usr/bin", + ]), + } + + return env + + +def delete_path(name: str) -> None: + """Delete the named cross-build directory, if it exists.""" + path = CROSS_BUILD_DIR / name + if path.exists(): + print(f"Deleting {path} ...") + shutil.rmtree(path) + + +def all_host_triples(platform: str) -> list[str]: + """Return all host triples for the given platform. + + The host triples are the platform definitions used as input to configure + (e.g., "arm64-apple-ios-simulator"). + """ + triples = [] + for slice_name, slice_parts in HOSTS[platform].items(): + triples.extend(list(slice_parts)) + return triples + + +def clean(context: argparse.Namespace, target: str = "all") -> None: + """The implementation of the "clean" command.""" + # If we're explicitly targeting the build, there's no platform or + # distribution artefacts. If we're cleaning tests, we keep all built + # artefacts. Otherwise, the built artefacts must be dirty, so we remove + # them. + if target not in {"build", "test"}: + paths = ["dist", context.platform] + list(HOSTS[context.platform]) + else: + paths = [] + + if target in {"all", "build"}: + paths.append("build") + + if target in {"all", "hosts"}: + paths.extend(all_host_triples(context.platform)) + elif target not in {"build", "test", "package"}: + paths.append(target) + + if target in {"all", "hosts", "test"}: + paths.extend([ + path.name + for path in CROSS_BUILD_DIR.glob(f"{context.platform}-testbed.*") + ]) + + for path in paths: + delete_path(path) + + +def build_python_path() -> Path: + """The path to the build Python binary.""" + build_dir = subdir("build") + binary = build_dir / "python" + if not binary.is_file(): + binary = binary.with_suffix(".exe") + if not binary.is_file(): + raise FileNotFoundError( + f"Unable to find `python(.exe)` in {build_dir}" + ) + + return binary + + +@contextmanager +def group(text: str): + """A context manager that outputs a log marker around a section of a build. + + If running in a GitHub Actions environment, the GitHub syntax for + collapsible log sections is used. + """ + if "GITHUB_ACTIONS" in os.environ: + print(f"::group::{text}") + else: + print(f"===== {text} " + "=" * (70 - len(text))) + + yield + + if "GITHUB_ACTIONS" in os.environ: + print("::endgroup::") + else: + print() + + +@contextmanager +def cwd(subdir: Path): + """A context manager that sets the current working directory.""" + orig = os.getcwd() + os.chdir(subdir) + yield + os.chdir(orig) + + +def configure_build_python(context: argparse.Namespace) -> None: + """The implementation of the "configure-build" command.""" + if context.clean: + clean(context, "build") + + with ( + group("Configuring build Python"), + cwd(subdir("build", create=True)), + ): + command = [relpath(PYTHON_DIR / "configure")] + if context.args: + command.extend(context.args) + run(command) + + +def make_build_python(context: argparse.Namespace) -> None: + """The implementation of the "make-build" command.""" + with ( + group("Compiling build Python"), + cwd(subdir("build")), + ): + run(["make", "-j", str(os.cpu_count())]) + + +def apple_target(host: str) -> str: + """Return the Apple platform identifier for a given host triple.""" + for _, platform_slices in HOSTS.items(): + for slice_name, slice_parts in platform_slices.items(): + for host_triple, multiarch in slice_parts.items(): + if host == host_triple: + return ".".join(multiarch.split("-")[::-1]) + + raise KeyError(host) + + +def apple_multiarch(host: str) -> str: + """Return the multiarch descriptor for a given host triple.""" + for _, platform_slices in HOSTS.items(): + for slice_name, slice_parts in platform_slices.items(): + for host_triple, multiarch in slice_parts.items(): + if host == host_triple: + return multiarch + + raise KeyError(host) + + +def unpack_deps( + platform: str, + host: str, + prefix_dir: Path, + cache_dir: Path, +) -> None: + """Unpack binary dependencies into a provided directory. + + Downloads binaries if they aren't already present. Downloads will be stored + in provided cache directory. + + On iOS, as a safety mechanism, any dynamic libraries will be purged from + the unpacked dependencies. + """ + # To create new builds of these dependencies, usually all that's necessary + # is to push a tag to the cpython-apple-source-deps repository, and GitHub + # Actions will do the rest. + # + # If you're a member of the Python core team, and you'd like to be able to + # push these tags yourself, please contact Malcolm Smith or Russell + # Keith-Magee. + deps_url = "https://github.com/beeware/cpython-apple-source-deps/releases/download" + for name_ver in [ + "BZip2-1.0.8-2", + "libFFI-3.4.7-2", + "OpenSSL-3.0.18-1", + "XZ-5.6.4-2", + "mpdecimal-4.0.0-2", + "zstd-1.5.7-1", + ]: + filename = f"{name_ver.lower()}-{apple_target(host)}.tar.gz" + archive_path = download( + f"{deps_url}/{name_ver}/{filename}", + target_dir=cache_dir, + ) + shutil.unpack_archive(archive_path, prefix_dir) + + # Dynamic libraries will be preferentially linked over static; + # On iOS, ensure that no dylibs are available in the prefix folder. + if platform == "iOS": + for dylib in prefix_dir.glob("**/*.dylib"): + dylib.unlink() + + +def download(url: str, target_dir: Path) -> Path: + """Download the specified URL into the given directory. + + :return: The path to the downloaded archive. + """ + target_path = Path(target_dir).resolve() + target_path.mkdir(exist_ok=True, parents=True) + + out_path = target_path / basename(url) + if not Path(out_path).is_file(): + run([ + "curl", + "-Lf", + "--retry", + "5", + "--retry-all-errors", + "-o", + out_path, + url, + ]) + else: + print(f"Using cached version of {basename(url)}") + return out_path + + +def configure_host_python( + context: argparse.Namespace, + host: str | None = None, +) -> None: + """The implementation of the "configure-host" command.""" + if host is None: + host = context.host + + if context.clean: + clean(context, host) + + host_dir = subdir(host, create=True) + prefix_dir = host_dir / "prefix" + + with group(f"Downloading dependencies ({host})"): + if not prefix_dir.exists(): + prefix_dir.mkdir() + unpack_deps(context.platform, host, prefix_dir, context.cache_dir) + else: + print("Dependencies already installed") + + with ( + group(f"Configuring host Python ({host})"), + cwd(host_dir), + ): + command = [ + # Basic cross-compiling configuration + relpath(PYTHON_DIR / "configure"), + f"--host={host}", + f"--build={sysconfig.get_config_var('BUILD_GNU_TYPE')}", + f"--with-build-python={build_python_path()}", + "--with-system-libmpdec", + "--enable-framework", + # Dependent libraries. + f"--with-openssl={prefix_dir}", + f"LIBLZMA_CFLAGS=-I{prefix_dir}/include", + f"LIBLZMA_LIBS=-L{prefix_dir}/lib -llzma", + f"LIBFFI_CFLAGS=-I{prefix_dir}/include", + f"LIBFFI_LIBS=-L{prefix_dir}/lib -lffi", + f"LIBMPDEC_CFLAGS=-I{prefix_dir}/include", + f"LIBMPDEC_LIBS=-L{prefix_dir}/lib -lmpdec", + f"LIBZSTD_CFLAGS=-I{prefix_dir}/include", + f"LIBZSTD_LIBS=-L{prefix_dir}/lib -lzstd", + ] + + if context.args: + command.extend(context.args) + run(command, host=host) + + +def make_host_python( + context: argparse.Namespace, + host: str | None = None, +) -> None: + """The implementation of the "make-host" command.""" + if host is None: + host = context.host + + with ( + group(f"Compiling host Python ({host})"), + cwd(subdir(host)), + ): + run(["make", "-j", str(os.cpu_count())], host=host) + run(["make", "install"], host=host) + + +def framework_path(host_triple: str, multiarch: str) -> Path: + """The path to a built single-architecture framework product. + + :param host_triple: The host triple (e.g., arm64-apple-ios-simulator) + :param multiarch: The multiarch identifier (e.g., arm64-simulator) + """ + return CROSS_BUILD_DIR / f"{host_triple}/Apple/iOS/Frameworks/{multiarch}" + + +def package_version(prefix_path: Path) -> str: + """Extract the Python version being built from patchlevel.h.""" + for path in prefix_path.glob("**/patchlevel.h"): + text = path.read_text(encoding="utf-8") + if match := re.search( + r'\n\s*#define\s+PY_VERSION\s+"(.+)"\s*\n', text + ): + version = match[1] + # If not building against a tagged commit, add a timestamp to the + # version. Follow the PyPA version number rules, as this will make + # it easier to process with other tools. The version will have a + # `+` suffix once any official release has been made; a freshly + # forked main branch will have a version of 3.X.0a0. + if version.endswith("a0"): + version += "+" + if version.endswith("+"): + version += datetime.now(timezone.utc).strftime("%Y%m%d.%H%M%S") + + return version + + sys.exit("Unable to determine Python version being packaged.") + + +def lib_platform_files(dirname, names): + """A file filter that ignores platform-specific files in lib.""" + path = Path(dirname) + if ( + path.parts[-3] == "lib" + and path.parts[-2].startswith("python") + and path.parts[-1] == "lib-dynload" + ): + return names + elif path.parts[-2] == "lib" and path.parts[-1].startswith("python"): + ignored_names = { + name + for name in names + if ( + name.startswith("_sysconfigdata_") + or name.startswith("_sysconfig_vars_") + or name == "build-details.json" + ) + } + elif path.parts[-1] == "lib": + ignored_names = { + name + for name in names + if name.startswith("libpython") and name.endswith(".dylib") + } + else: + ignored_names = set() + + return ignored_names + + +def lib_non_platform_files(dirname, names): + """A file filter that ignores anything *except* platform-specific files + in the lib directory. + """ + path = Path(dirname) + if path.parts[-2] == "lib" and path.parts[-1].startswith("python"): + return ( + set(names) - lib_platform_files(dirname, names) - {"lib-dynload"} + ) + else: + return set() + + +def create_xcframework(platform: str) -> str: + """Build an XCframework from the component parts for the platform. + + :return: The version number of the Python version that was packaged. + """ + package_path = CROSS_BUILD_DIR / platform + try: + package_path.mkdir() + except FileExistsError: + raise RuntimeError( + f"{platform} XCframework already exists; do you need to run " + "with --clean?" + ) from None + + frameworks = [] + # Merge Frameworks for each component SDK. If there's only one architecture + # for the SDK, we can use the compiled Python.framework as-is. However, if + # there's more than architecture, we need to merge the individual built + # frameworks into a merged "fat" framework. + for slice_name, slice_parts in HOSTS[platform].items(): + # Some parts are the same across all slices, so we use can any of the + # host frameworks as the source for the merged version. Use the first + # one on the list, as it's as representative as any other. + first_host_triple, first_multiarch = next(iter(slice_parts.items())) + first_framework = ( + framework_path(first_host_triple, first_multiarch) + / "Python.framework" + ) + + if len(slice_parts) == 1: + # The first framework is the only framework, so copy it. + print(f"Copying framework for {slice_name}...") + frameworks.append(first_framework) + else: + print(f"Merging framework for {slice_name}...") + slice_path = CROSS_BUILD_DIR / slice_name + slice_framework = slice_path / "Python.framework" + slice_framework.mkdir(exist_ok=True, parents=True) + + # Copy the Info.plist + shutil.copy( + first_framework / "Info.plist", + slice_framework / "Info.plist", + ) + + # Copy the headers + shutil.copytree( + first_framework / "Headers", + slice_framework / "Headers", + ) + + # Create the "fat" library binary for the slice + run( + ["lipo", "-create", "-output", slice_framework / "Python"] + + [ + ( + framework_path(host_triple, multiarch) + / "Python.framework/Python" + ) + for host_triple, multiarch in slice_parts.items() + ] + ) + + # Add this merged slice to the list to be added to the XCframework + frameworks.append(slice_framework) + + print() + print("Build XCframework...") + cmd = [ + "xcodebuild", + "-create-xcframework", + "-output", + package_path / "Python.xcframework", + ] + for framework in frameworks: + cmd.extend(["-framework", framework]) + + run(cmd) + + # Extract the package version from the merged framework + version = package_version(package_path / "Python.xcframework") + version_tag = ".".join(version.split(".")[:2]) + + # On non-macOS platforms, each framework in XCframework only contains the + # headers, libPython, plus an Info.plist. Other resources like the standard + # library and binary shims aren't allowed to live in framework; they need + # to be copied in separately. + print() + print("Copy additional resources...") + has_common_stdlib = False + for slice_name, slice_parts in HOSTS[platform].items(): + # Some parts are the same across all slices, so we can any of the + # host frameworks as the source for the merged version. + first_host_triple, first_multiarch = next(iter(slice_parts.items())) + first_path = framework_path(first_host_triple, first_multiarch) + first_framework = first_path / "Python.framework" + + slice_path = package_path / f"Python.xcframework/{slice_name}" + slice_framework = slice_path / "Python.framework" + + # Copy the binary helpers + print(f" - {slice_name} binaries") + shutil.copytree(first_path / "bin", slice_path / "bin") + + # Copy the include path (a symlink to the framework headers) + print(f" - {slice_name} include files") + shutil.copytree( + first_path / "include", + slice_path / "include", + symlinks=True, + ) + + # Copy in the cross-architecture pyconfig.h + shutil.copy( + PYTHON_DIR / f"Apple/{platform}/Resources/pyconfig.h", + slice_framework / "Headers/pyconfig.h", + ) + + print(f" - {slice_name} shared library") + # Create a simlink for the fat library + shared_lib = slice_path / f"lib/libpython{version_tag}.dylib" + shared_lib.parent.mkdir() + shared_lib.symlink_to("../Python.framework/Python") + + print(f" - {slice_name} architecture-specific files") + for host_triple, multiarch in slice_parts.items(): + print(f" - {multiarch} standard library") + arch, _ = multiarch.split("-", 1) + + if not has_common_stdlib: + print(" - using this architecture as the common stdlib") + shutil.copytree( + framework_path(host_triple, multiarch) / "lib", + package_path / "Python.xcframework/lib", + ignore=lib_platform_files, + symlinks=True, + ) + has_common_stdlib = True + + shutil.copytree( + framework_path(host_triple, multiarch) / "lib", + slice_path / f"lib-{arch}", + ignore=lib_non_platform_files, + symlinks=True, + ) + + # Copy the host's pyconfig.h to an architecture-specific name. + arch = multiarch.split("-")[0] + host_path = ( + CROSS_BUILD_DIR + / host_triple + / "Apple/iOS/Frameworks" + / multiarch + ) + host_framework = host_path / "Python.framework" + shutil.copy( + host_framework / "Headers/pyconfig.h", + slice_framework / f"Headers/pyconfig-{arch}.h", + ) + + # Apple identifies certain libraries as "security risks"; if you + # statically link those libraries into a Framework, you become + # responsible for providing a privacy manifest for that framework. + xcprivacy_file = { + "OpenSSL": subdir(host_triple) + / "prefix/share/OpenSSL.xcprivacy" + } + print(f" - {multiarch} xcprivacy files") + for module, lib in [ + ("_hashlib", "OpenSSL"), + ("_ssl", "OpenSSL"), + ]: + shutil.copy( + xcprivacy_file[lib], + slice_path + / f"lib-{arch}/python{version_tag}" + / f"lib-dynload/{module}.xcprivacy", + ) + + print(" - build tools") + shutil.copytree( + PYTHON_DIR / "Apple/testbed/Python.xcframework/build", + package_path / "Python.xcframework/build", + ) + + return version + + +def package(context: argparse.Namespace) -> None: + """The implementation of the "package" command.""" + if context.clean: + clean(context, "package") + + with group("Building package"): + # Create an XCframework + version = create_xcframework(context.platform) + + # Clone testbed + print() + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + CROSS_BUILD_DIR / context.platform / "Python.xcframework", + CROSS_BUILD_DIR / context.platform / "testbed", + ]) + + # Build the final archive + archive_name = ( + CROSS_BUILD_DIR + / "dist" + / f"python-{version}-{context.platform}-XCframework" + ) + + print() + print("Create package archive...") + shutil.make_archive( + str(CROSS_BUILD_DIR / archive_name), + format="gztar", + root_dir=CROSS_BUILD_DIR / context.platform, + base_dir=".", + ) + print() + print(f"{archive_name.relative_to(PYTHON_DIR)}.tar.gz created.") + + +def build(context: argparse.Namespace, host: str | None = None) -> None: + """The implementation of the "build" command.""" + if host is None: + host = context.host + + if context.clean: + clean(context, host) + + if host in {"all", "build"}: + for step in [ + configure_build_python, + make_build_python, + ]: + step(context) + + if host == "build": + hosts = [] + elif host in {"all", "hosts"}: + hosts = all_host_triples(context.platform) + else: + hosts = [host] + + for step_host in hosts: + for step in [ + configure_host_python, + make_host_python, + ]: + step(context, host=step_host) + + if host in {"all", "hosts"}: + package(context) + + +def test(context: argparse.Namespace, host: str | None = None) -> None: # noqa: PT028 + """The implementation of the "test" command.""" + if host is None: + host = context.host + + if context.clean: + clean(context, "test") + + with group(f"Test {'XCframework' if host in {'all', 'hosts'} else host}"): + timestamp = str(time.time_ns())[:-6] + testbed_dir = ( + CROSS_BUILD_DIR / f"{context.platform}-testbed.{timestamp}" + ) + if host in {"all", "hosts"}: + framework_path = ( + CROSS_BUILD_DIR / context.platform / "Python.xcframework" + ) + else: + build_arch = platform.machine() + host_arch = host.split("-")[0] + + if not host.endswith("-simulator"): + print("Skipping test suite non-simulator build.") + return + elif build_arch != host_arch: + print( + f"Skipping test suite for an {host_arch} build " + f"on an {build_arch} machine." + ) + return + else: + framework_path = ( + CROSS_BUILD_DIR + / host + / f"Apple/{context.platform}" + / f"Frameworks/{apple_multiarch(host)}" + ) + + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + framework_path, + testbed_dir, + ]) + + run( + [ + sys.executable, + testbed_dir, + "run", + "--verbose", + ] + + ( + ["--simulator", str(context.simulator)] + if context.simulator + else [] + ) + + [ + "--", + "test", + f"--{context.ci_mode}-ci", + "--single-process", + "--no-randomize", + # Timeout handling requires subprocesses; explicitly setting + # the timeout to -1 disables the faulthandler. + "--timeout=-1", + # Adding Python options requires the use of a subprocess to + # start a new Python interpreter. + "--dont-add-python-opts", + ] + ) + + +def apple_sim_host(platform_name: str) -> str: + """Determine the native simulator target for this platform.""" + for _, slice_parts in HOSTS[platform_name].items(): + for host_triple in slice_parts: + parts = host_triple.split("-") + if parts[0] == platform.machine() and parts[-1] == "simulator": + return host_triple + + raise KeyError(platform_name) + + +def ci(context: argparse.Namespace) -> None: + """The implementation of the "ci" command. + + In "Fast" mode, this compiles the build python, and the simulator for the + build machine's architecture; and runs the test suite with `--fast-ci` + configuration. + + In "Slow" mode, it compiles the build python, plus all candidate + architectures (both device and simulator); then runs the test suite with + `--slow-ci` configuration. + """ + clean(context, "all") + if context.ci_mode == "slow": + # In slow mode, build and test the full XCframework + build(context, host="all") + test(context, host="all") + else: + # In fast mode, just build the simulator platform. + sim_host = apple_sim_host(context.platform) + build(context, host="build") + build(context, host=sim_host) + test(context, host=sim_host) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "A tool for managing the build, package and test process of " + "CPython on Apple platforms." + ), + ) + parser.suggest_on_error = True + subcommands = parser.add_subparsers(dest="subcommand", required=True) + + clean = subcommands.add_parser( + "clean", + help="Delete all build directories", + ) + + configure_build = subcommands.add_parser( + "configure-build", help="Run `configure` for the build Python" + ) + subcommands.add_parser( + "make-build", help="Run `make` for the build Python" + ) + configure_host = subcommands.add_parser( + "configure-host", + help="Run `configure` for a specific platform and target", + ) + make_host = subcommands.add_parser( + "make-host", + help="Run `make` for a specific platform and target", + ) + package = subcommands.add_parser( + "package", + help="Create a release package for the platform", + ) + build = subcommands.add_parser( + "build", + help="Build all platform targets and create the XCframework", + ) + test = subcommands.add_parser( + "test", + help="Run the testbed for a specific platform", + ) + ci = subcommands.add_parser( + "ci", + help="Run build, package, and test", + ) + + # platform argument + for cmd in [clean, configure_host, make_host, package, build, test, ci]: + cmd.add_argument( + "platform", + choices=HOSTS.keys(), + help="The target platform to build", + ) + + # host triple argument + for cmd in [configure_host, make_host]: + cmd.add_argument( + "host", + help="The host triple to build (e.g., arm64-apple-ios-simulator)", + ) + # optional host triple argument + for cmd in [clean, build, test]: + cmd.add_argument( + "host", + nargs="?", + default="all", + help=( + "The host triple to build (e.g., arm64-apple-ios-simulator), " + "or 'build' for just the build platform, or 'hosts' for all " + "host platforms, or 'all' for the build platform and all " + "hosts. Defaults to 'all'" + ), + ) + + # --clean option + for cmd in [configure_build, configure_host, build, package, test, ci]: + cmd.add_argument( + "--clean", + action="store_true", + default=False, + dest="clean", + help="Delete the relevant build directories first", + ) + + # --cache-dir option + for cmd in [configure_host, build, ci]: + cmd.add_argument( + "--cache-dir", + default="./cross-build/downloads", + help="The directory to store cached downloads.", + ) + + # --simulator option + for cmd in [test, ci]: + cmd.add_argument( + "--simulator", + help=( + "The name of the simulator to use (eg: 'iPhone 16e'). " + "Defaults to the most recently released 'entry level' " + "iPhone device. Device architecture and OS version can also " + "be specified; e.g., " + "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would " + "run on an ARM64 iPhone 16 Pro simulator running iOS 26.0." + ), + ) + group = cmd.add_mutually_exclusive_group() + group.add_argument( + "--fast-ci", + action="store_const", + dest="ci_mode", + const="fast", + help="Add test arguments for GitHub Actions", + ) + group.add_argument( + "--slow-ci", + action="store_const", + dest="ci_mode", + const="slow", + help="Add test arguments for buildbots", + ) + + for subcommand in [configure_build, configure_host, build, ci]: + subcommand.add_argument( + "args", nargs="*", help="Extra arguments to pass to `configure`" + ) + + return parser.parse_args() + + +def print_called_process_error(e: subprocess.CalledProcessError) -> None: + for stream_name in ["stdout", "stderr"]: + content = getattr(e, stream_name) + stream = getattr(sys, stream_name) + if content: + stream.write(content) + if not content.endswith("\n"): + stream.write("\n") + + # shlex uses single quotes, so we surround the command with double quotes. + print( + f'Command "{join_command(e.cmd)}" returned exit status {e.returncode}' + ) + + +def main() -> None: + # Handle SIGTERM the same way as SIGINT. This ensures that if we're + # terminated by the buildbot worker, we'll make an attempt to clean up our + # subprocesses. + def signal_handler(*args): + os.kill(os.getpid(), signal.SIGINT) + + signal.signal(signal.SIGTERM, signal_handler) + + # Process command line arguments + context = parse_args() + dispatch: dict[str, Callable] = { + "clean": clean, + "configure-build": configure_build_python, + "make-build": make_build_python, + "configure-host": configure_host_python, + "make-host": make_host_python, + "package": package, + "build": build, + "test": test, + "ci": ci, + } + + try: + dispatch[context.subcommand](context) + except CalledProcessError as e: + print() + print_called_process_error(e) + sys.exit(1) + except RuntimeError as e: + print() + print(e) + sys.exit(2) + + +if __name__ == "__main__": + # Under the buildbot, stdout is not a TTY, but we must still flush after + # every line to make sure our output appears in the correct order relative + # to the output of our subprocesses. + for stream in [sys.stdout, sys.stderr]: + stream.reconfigure(line_buffering=True) + + main() diff --git a/Apple/iOS/README.md b/Apple/iOS/README.md new file mode 100644 index 00000000000000..7ee257b5d648f4 --- /dev/null +++ b/Apple/iOS/README.md @@ -0,0 +1,339 @@ +# Python on iOS README + +**iOS support is [tier 3](https://peps.python.org/pep-0011/#tier-3).** + +This document provides a quick overview of some iOS specific features in the +Python distribution. + +These instructions are only needed if you're planning to compile Python for iOS +yourself. Most users should *not* need to do this. If you're looking to +experiment with writing an iOS app in Python, tools such as [BeeWare's +Briefcase](https://briefcase.readthedocs.io) and [Kivy's +Buildozer](https://buildozer.readthedocs.io) will provide a much more +approachable user experience. + +## Compilers for building on iOS + +Building for iOS requires the use of Apple's Xcode tooling. It is strongly +recommended that you use the most recent stable release of Xcode. This will +require the use of the most (or second-most) recently released macOS version, +as Apple does not maintain Xcode for older macOS versions. The Xcode Command +Line Tools are not sufficient for iOS development; you need a *full* Xcode +install. + +If you want to run your code on the iOS simulator, you'll also need to install +an iOS Simulator Platform. You should be prompted to select an iOS Simulator +Platform when you first run Xcode. Alternatively, you can add an iOS Simulator +Platform by selecting an open the Platforms tab of the Xcode Settings panel. + +## Building Python on iOS + +### ABIs and Architectures + +iOS apps can be deployed on physical devices, and on the iOS simulator. Although +the API used on these devices is identical, the ABI is different - you need to +link against different libraries for an iOS device build (`iphoneos`) or an +iOS simulator build (`iphonesimulator`). + +Apple uses the `XCframework` format to allow specifying a single dependency +that supports multiple ABIs. An `XCframework` is a wrapper around multiple +ABI-specific frameworks that share a common API. + +iOS can also support different CPU architectures within each ABI. At present, +there is only a single supported architecture on physical devices - ARM64. +However, the *simulator* supports 2 architectures - ARM64 (for running on Apple +Silicon machines), and x86_64 (for running on older Intel-based machines). + +To support multiple CPU architectures on a single platform, Apple uses a "fat +binary" format - a single physical file that contains support for multiple +architectures. It is possible to compile and use a "thin" single architecture +version of a binary for testing purposes; however, the "thin" binary will not be +portable to machines using other architectures. + +### Building a multi-architecture iOS XCframework + +The `Apple` subfolder of the Python repository acts as a build script that +can be used to coordinate the compilation of a complete iOS XCframework. To use +it, run:: + + python Apple build iOS + +This will: + +* Configure and compile a version of Python to run on the build machine +* Download pre-compiled binary dependencies for each platform +* Configure and build a `Python.framework` for each required architecture and + iOS SDK +* Merge the multiple `Python.framework` folders into a single `Python.xcframework` +* Produce a `.tar.gz` archive in the `cross-build/dist` folder containing + the `Python.xcframework`, plus a copy of the Testbed app pre-configured to + use the XCframework. + +The `Apple` build script has other entry points that will perform the +individual parts of the overall `build` target, plus targets to test the +build, clean the `cross-build` folder of iOS build products, and perform a +complete "build and test" CI run. The `--clean` flag can also be used on +individual commands to ensure that a stale build product are removed before +building. + +### Building a single-architecture framework + +If you're using the `Apple` build script, you won't need to build +individual frameworks. However, if you do need to manually configure an iOS +Python build for a single framework, the following options are available. + +#### iOS specific arguments to configure + +* `--enable-framework[=DIR]` + + This argument specifies the location where the Python.framework will be + installed. If `DIR` is not specified, the framework will be installed into + a subdirectory of the `iOS/Frameworks` folder. + + This argument *must* be provided when configuring iOS builds. iOS does not + support non-framework builds. + +* `--with-framework-name=NAME` + + Specify the name for the Python framework; defaults to `Python`. + + > [!NOTE] + > Unless you know what you're doing, changing the name of the Python + > framework on iOS is not advised. If you use this option, you won't be able + > to run the `Apple` build script without making significant manual + > alterations, and you won't be able to use any binary packages unless you + > compile them yourself using your own framework name. + +#### Building Python for iOS + +The Python build system will create a `Python.framework` that supports a +*single* ABI with a *single* architecture. Unlike macOS, iOS does not allow a +framework to contain non-library content, so the iOS build will produce a +`bin` and `lib` folder in the same output folder as `Python.framework`. +The `lib` folder will be needed at runtime to support the Python library. + +If you want to use Python in a real iOS project, you need to produce multiple +`Python.framework` builds, one for each ABI and architecture. iOS builds of +Python *must* be constructed as framework builds. To support this, you must +provide the `--enable-framework` flag when configuring the build. The build +also requires the use of cross-compilation. The minimal commands for building +Python for the ARM64 iOS simulator will look something like: +``` +export PATH="$(pwd)/Apple/iOS/Resources/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/Apple/usr/bin" +./configure \ + --enable-framework \ + --host=arm64-apple-ios-simulator \ + --build=arm64-apple-darwin \ + --with-build-python=/path/to/python.exe +make +make install +``` + +In this invocation: + +* `Apple/iOS/Resources/bin` has been added to the path, providing some shims for the + compilers and linkers needed by the build. Xcode requires the use of `xcrun` + to invoke compiler tooling. However, if `xcrun` is pre-evaluated and the + result passed to `configure`, these results can embed user- and + version-specific paths into the sysconfig data, which limits the portability + of the compiled Python. Alternatively, if `xcrun` is used *as* the compiler, + it requires that compiler variables like `CC` include spaces, which can + cause significant problems with many C configuration systems which assume that + `CC` will be a single executable. + + To work around this problem, the `Apple/iOS/Resources/bin` folder contains some + wrapper scripts that present as simple compilers and linkers, but wrap + underlying calls to `xcrun`. This allows configure to use a `CC` + definition without spaces, and without user- or version-specific paths, while + retaining the ability to adapt to the local Xcode install. These scripts are + included in the `bin` directory of an iOS install. + + These scripts will, by default, use the currently active Xcode installation. + If you want to use a different Xcode installation, you can use + `xcode-select` to set a new default Xcode globally, or you can use the + `DEVELOPER_DIR` environment variable to specify an Xcode install. The + scripts will use the default `iphoneos`/`iphonesimulator` SDK version for + the select Xcode install; if you want to use a different SDK, you can set the + `IOS_SDK_VERSION` environment variable. (e.g, setting + `IOS_SDK_VERSION=17.1` would cause the scripts to use the `iphoneos17.1` + and `iphonesimulator17.1` SDKs, regardless of the Xcode default.) + + The path has also been cleared of any user customizations. A common source of + bugs is for tools like Homebrew to accidentally leak macOS binaries into an iOS + build. Resetting the path to a known "bare bones" value is the easiest way to + avoid these problems. + +* `--host` is the architecture and ABI that you want to build, in GNU compiler + triple format. This will be one of: + + - `arm64-apple-ios` for ARM64 iOS devices. + - `arm64-apple-ios-simulator` for the iOS simulator running on Apple + Silicon devices. + - `x86_64-apple-ios-simulator` for the iOS simulator running on Intel + devices. + +* `--build` is the GNU compiler triple for the machine that will be running + the compiler. This is one of: + + - `arm64-apple-darwin` for Apple Silicon devices. + - `x86_64-apple-darwin` for Intel devices. + +* `/path/to/python.exe` is the path to a Python binary on the machine that + will be running the compiler. This is needed because the Python compilation + process involves running some Python code. On a normal desktop build of + Python, you can compile a python interpreter and then use that interpreter to + run Python code. However, the binaries produced for iOS won't run on macOS, so + you need to provide an external Python interpreter. This interpreter must be + the same version as the Python that is being compiled. To be completely safe, + this should be the *exact* same commit hash. However, the longer a Python + release has been stable, the more likely it is that this constraint can be + relaxed - the same micro version will often be sufficient. + +* The `install` target for iOS builds is slightly different to other + platforms. On most platforms, `make install` will install the build into + the final runtime location. This won't be the case for iOS, as the final + runtime location will be on a physical device. + + However, you still need to run the `install` target for iOS builds, as it + performs some final framework assembly steps. The location specified with + `--enable-framework` will be the location where `make install` will + assemble the complete iOS framework. This completed framework can then + be copied and relocated as required. + +For a full CPython build, you also need to specify the paths to iOS builds of +the binary libraries that CPython depends on (such as XZ, LibFFI and OpenSSL). +This can be done by defining library specific environment variables (such as +`LIBLZMA_CFLAGS`, `LIBLZMA_LIBS`), and the `--with-openssl` configure +option. Versions of these libraries pre-compiled for iOS can be found in [this +repository](https://github.com/beeware/cpython-apple-source-deps/releases). +LibFFI is especially important, as many parts of the standard library +(including the `platform`, `sysconfig` and `webbrowser` modules) require +the use of the `ctypes` module at runtime. + +By default, Python will be compiled with an iOS deployment target (i.e., the +minimum supported iOS version) of 13.0. To specify a different deployment +target, provide the version number as part of the `--host` argument - for +example, `--host=arm64-apple-ios15.4-simulator` would compile an ARM64 +simulator build with a deployment target of 15.4. + +## Testing Python on iOS + +### Testing a multi-architecture framework + +Once you have a built an XCframework, you can test that framework by running: + + $ python Apple test iOS + +This test will attempt to find an "SE-class" simulator (i.e., an iPhone SE, or +iPhone 16e, or similar), and run the test suite on the most recent version of +iOS that is available. You can specify a simulator using the `--simulator` +command line argument, providing the name of the simulator (e.g., `--simulator +'iPhone 16 Pro'`). You can also use this argument to control the OS version used +for testing; `--simulator 'iPhone 16 Pro,OS=18.2'` would attempt to run the +tests on an iPhone 16 Pro running iOS 18.2. + +If the test runner is executed on GitHub Actions, the `GITHUB_ACTIONS` +environment variable will be exposed to the iOS process at runtime. + +### Testing a single-architecture framework + +The `Apple/testbed` folder that contains an Xcode project that is able to run +the Python test suite on Apple platforms. This project converts the Python test +suite into a single test case in Xcode's XCTest framework. The single XCTest +passes if the test suite passes. + +To run the test suite, configure a Python build for an iOS simulator (i.e., +`--host=arm64-apple-ios-simulator` or `--host=x86_64-apple-ios-simulator` +), specifying a framework build (i.e. `--enable-framework`). Ensure that your +`PATH` has been configured to include the `Apple/iOS/Resources/bin` folder and +exclude any non-iOS tools, then run: +``` +make all +make install +make testios +``` + +This will: + +* Build an iOS framework for your chosen architecture; +* Finalize the single-platform framework; +* Make a clean copy of the testbed project; +* Install the Python iOS framework into the copy of the testbed project; and +* Run the test suite on an "entry-level device" simulator (i.e., an iPhone SE, + iPhone 16e, or a similar). + +On success, the test suite will exit and report successful completion of the +test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 15 +minutes to run; a couple of extra minutes is required to compile the testbed +project, and then boot and prepare the iOS simulator. + +### Debugging test failures + +Running `python Apple test iOS` generates a standalone version of the +`Apple/testbed` project, and runs the full test suite. It does this using +`Apple/testbed` itself - the folder is an executable module that can be used +to create and run a clone of the testbed project. The standalone version of the +testbed will be created in a directory named +`cross-build/iOS-testbed.`. + +You can generate your own standalone testbed instance by running: +``` +python cross-build/iOS/testbed clone my-testbed +``` + +In this invocation, `my-testbed` is the name of the folder for the new +testbed clone. + +If you've built your own XCframework, or you only want to test a single architecture, +you can construct a standalone testbed instance by running: +``` +python Apple/testbed clone --platform iOS --framework my-testbed +``` + +The framework path can be the path path to a `Python.xcframework`, or the +path to a folder that contains a single-platform `Python.framework`. + +You can then use the `my-testbed` folder to run the Python test suite, +passing in any command line arguments you may require. For example, if you're +trying to diagnose a failure in the `os` module, you might run: +``` +python my-testbed run -- test -W test_os +``` + +This is the equivalent of running `python -m test -W test_os` on a desktop +Python build. Any arguments after the `--` will be passed to testbed as if +they were arguments to `python -m` on a desktop machine. + +### Testing in Xcode + +You can also open the testbed project in Xcode by running: +``` +open my-testbed/iOSTestbed.xcodeproj +``` + +This will allow you to use the full Xcode suite of tools for debugging. + +The arguments used to run the test suite are defined as part of the test plan. +To modify the test plan, select the test plan node of the project tree (it +should be the first child of the root node), and select the "Configurations" +tab. Modify the "Arguments Passed On Launch" value to change the testing +arguments. + +The test plan also disables parallel testing, and specifies the use of the +`Testbed.lldbinit` file for providing configuration of the debugger. The +default debugger configuration disables automatic breakpoints on the +`SIGINT`, `SIGUSR1`, `SIGUSR2`, and `SIGXFSZ` signals. + +### Testing on an iOS device + +To test on an iOS device, the app needs to be signed with known developer +credentials. To obtain these credentials, you must have an iOS Developer +account, and your Xcode install will need to be logged into your account (see +the Accounts tab of the Preferences dialog). + +Once the project is open, and you're signed into your Apple Developer account, +select the root node of the project tree (labeled "iOSTestbed"), then the +"Signing & Capabilities" tab in the details page. Select a development team +(this will likely be your own name), and plug in a physical device to your +macOS machine with a USB cable. You should then be able to select your physical +device from the list of targets in the pulldown in the Xcode titlebar. diff --git a/iOS/Resources/Info.plist.in b/Apple/iOS/Resources/Info.plist.in similarity index 100% rename from iOS/Resources/Info.plist.in rename to Apple/iOS/Resources/Info.plist.in diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-ar b/Apple/iOS/Resources/bin/arm64-apple-ios-ar new file mode 100755 index 00000000000000..3cf3eb218741fa --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-ar @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} ar "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-clang b/Apple/iOS/Resources/bin/arm64-apple-ios-clang new file mode 100755 index 00000000000000..f50d5b5142fc76 --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-clang @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET} "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-clang++ b/Apple/iOS/Resources/bin/arm64-apple-ios-clang++ new file mode 100755 index 00000000000000..0794731d7dcbda --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-clang++ @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang++ -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET} "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-cpp b/Apple/iOS/Resources/bin/arm64-apple-ios-cpp new file mode 100755 index 00000000000000..24fa1506bab827 --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-cpp @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET} -E "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-ar b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-ar new file mode 100755 index 00000000000000..b836b6db9025bb --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-ar @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang new file mode 100755 index 00000000000000..4891a00876e0bd --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ new file mode 100755 index 00000000000000..58b2a5f6f18c2b --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-cpp b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-cpp new file mode 100755 index 00000000000000..c9df94e8b7c837 --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-cpp @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator -E "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..fd59d309b73a20 --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/Apple/iOS/Resources/bin/arm64-apple-ios-strip b/Apple/iOS/Resources/bin/arm64-apple-ios-strip new file mode 100755 index 00000000000000..75e823a3d02d61 --- /dev/null +++ b/Apple/iOS/Resources/bin/arm64-apple-ios-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-ar b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-ar new file mode 100755 index 00000000000000..b836b6db9025bb --- /dev/null +++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-ar @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang new file mode 100755 index 00000000000000..f4739a7b945d01 --- /dev/null +++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator "$@" diff --git a/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ new file mode 100755 index 00000000000000..c348ae4c10395b --- /dev/null +++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target x86_64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator "$@" diff --git a/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp new file mode 100755 index 00000000000000..6d7f8084c9fdcc --- /dev/null +++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios${IPHONEOS_DEPLOYMENT_TARGET}-simulator -E "$@" diff --git a/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..c5cfb28929195a --- /dev/null +++ b/Apple/iOS/Resources/bin/x86_64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@" diff --git a/iOS/Resources/pyconfig.h b/Apple/iOS/Resources/pyconfig.h similarity index 100% rename from iOS/Resources/pyconfig.h rename to Apple/iOS/Resources/pyconfig.h diff --git a/iOS/testbed/Python.xcframework/Info.plist b/Apple/testbed/Python.xcframework/Info.plist similarity index 100% rename from iOS/testbed/Python.xcframework/Info.plist rename to Apple/testbed/Python.xcframework/Info.plist diff --git a/iOS/Resources/dylib-Info-template.plist b/Apple/testbed/Python.xcframework/build/iOS-dylib-Info-template.plist similarity index 96% rename from iOS/Resources/dylib-Info-template.plist rename to Apple/testbed/Python.xcframework/build/iOS-dylib-Info-template.plist index f652e272f71c88..d6caa01c1e44b9 100644 --- a/iOS/Resources/dylib-Info-template.plist +++ b/Apple/testbed/Python.xcframework/build/iOS-dylib-Info-template.plist @@ -19,7 +19,7 @@ iPhoneOS MinimumOSVersion - 12.0 + 13.0 CFBundleVersion 1 diff --git a/Apple/testbed/Python.xcframework/build/utils.sh b/Apple/testbed/Python.xcframework/build/utils.sh new file mode 100755 index 00000000000000..e7155d8b30e213 --- /dev/null +++ b/Apple/testbed/Python.xcframework/build/utils.sh @@ -0,0 +1,151 @@ +# Utility methods for use in an Xcode project. +# +# An iOS XCframework cannot include any content other than the library binary +# and relevant metadata. However, Python requires a standard library at runtime. +# Therefore, it is necessary to add a build step to an Xcode app target that +# processes the standard library and puts the content into the final app. +# +# In general, these tools will be invoked after bundle resources have been +# copied into the app, but before framework embedding (and signing). +# +# The following is an example script, assuming that: +# * Python.xcframework is in the root of the project +# * There is an `app` folder that contains the app code +# * There is an `app_packages` folder that contains installed Python packages. +# ----- +# set -e +# source $PROJECT_DIR/Python.xcframework/build/build_utils.sh +# install_python Python.xcframework app app_packages +# ----- + +# Copy the standard library from the XCframework into the app bundle. +# +# Accepts one argument: +# 1. The path, relative to the root of the Xcode project, where the Python +# XCframework can be found. +install_stdlib() { + PYTHON_XCFRAMEWORK_PATH=$1 + + mkdir -p "$CODESIGNING_FOLDER_PATH/python/lib" + if [ "$EFFECTIVE_PLATFORM_NAME" = "-iphonesimulator" ]; then + echo "Installing Python modules for iOS Simulator" + if [ -d "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/ios-arm64-simulator" ]; then + SLICE_FOLDER="ios-arm64-simulator" + else + SLICE_FOLDER="ios-arm64_x86_64-simulator" + fi + else + echo "Installing Python modules for iOS Device" + SLICE_FOLDER="ios-arm64" + fi + + # If the XCframework has a shared lib folder, then it's a full framework. + # Copy both the common and slice-specific part of the lib directory. + # Otherwise, it's a single-arch framework; use the "full" lib folder. + if [ -d "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/lib" ]; then + rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" + rsync -au "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib-$ARCHS/" "$CODESIGNING_FOLDER_PATH/python/lib/" + else + # A single-arch framework will have a libpython symlink; that can't be included at runtime + rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" --exclude 'libpython*.dylib' + fi +} + +# Convert a single .so library into a framework that iOS can load. +# +# Accepts three arguments: +# 1. The path, relative to the root of the Xcode project, where the Python +# XCframework can be found. +# 2. The base path, relative to the installed location in the app bundle, that +# needs to be processed. Any .so file found in this path (or a subdirectory +# of it) will be processed. +# 2. The full path to a single .so file to process. This path should include +# the base path. +install_dylib () { + PYTHON_XCFRAMEWORK_PATH=$1 + INSTALL_BASE=$2 + FULL_EXT=$3 + + # The name of the extension file + EXT=$(basename "$FULL_EXT") + # The name and location of the module + MODULE_PATH=$(dirname "$FULL_EXT") + MODULE_NAME=$(echo $EXT | cut -d "." -f 1) + # The location of the extension file, relative to the bundle + RELATIVE_EXT=${FULL_EXT#$CODESIGNING_FOLDER_PATH/} + # The path to the extension file, relative to the install base + PYTHON_EXT=${RELATIVE_EXT/$INSTALL_BASE/} + # The full dotted name of the extension module, constructed from the file path. + FULL_MODULE_NAME=$(echo $PYTHON_EXT | cut -d "." -f 1 | tr "/" "."); + # A bundle identifier; not actually used, but required by Xcode framework packaging + FRAMEWORK_BUNDLE_ID=$(echo $PRODUCT_BUNDLE_IDENTIFIER.$FULL_MODULE_NAME | tr "_" "-") + # The name of the framework folder. + FRAMEWORK_FOLDER="Frameworks/$FULL_MODULE_NAME.framework" + + # If the framework folder doesn't exist, create it. + if [ ! -d "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER" ]; then + echo "Creating framework for $RELATIVE_EXT" + mkdir -p "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER" + cp "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/build/$PLATFORM_FAMILY_NAME-dylib-Info-template.plist" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" + plutil -replace CFBundleExecutable -string "$FULL_MODULE_NAME" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" + plutil -replace CFBundleIdentifier -string "$FRAMEWORK_BUNDLE_ID" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" + fi + + echo "Installing binary for $FRAMEWORK_FOLDER/$FULL_MODULE_NAME" + mv "$FULL_EXT" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME" + # Create a placeholder .fwork file where the .so was + echo "$FRAMEWORK_FOLDER/$FULL_MODULE_NAME" > ${FULL_EXT%.so}.fwork + # Create a back reference to the .so file location in the framework + echo "${RELATIVE_EXT%.so}.fwork" > "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME.origin" + + # If the framework provides an xcprivacy file, install it. + if [ -e "$MODULE_PATH/$MODULE_NAME.xcprivacy" ]; then + echo "Installing XCPrivacy file for $FRAMEWORK_FOLDER/$FULL_MODULE_NAME" + XCPRIVACY_FILE="$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/PrivacyInfo.xcprivacy" + if [ -e "$XCPRIVACY_FILE" ]; then + rm -rf "$XCPRIVACY_FILE" + fi + mv "$MODULE_PATH/$MODULE_NAME.xcprivacy" "$XCPRIVACY_FILE" + fi + + echo "Signing framework as $EXPANDED_CODE_SIGN_IDENTITY_NAME ($EXPANDED_CODE_SIGN_IDENTITY)..." + /usr/bin/codesign --force --sign "$EXPANDED_CODE_SIGN_IDENTITY" ${OTHER_CODE_SIGN_FLAGS:-} -o runtime --timestamp=none --preserve-metadata=identifier,entitlements,flags --generate-entitlement-der "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER" +} + +# Process all the dynamic libraries in a path into Framework format. +# +# Accepts two arguments: +# 1. The path, relative to the root of the Xcode project, where the Python +# XCframework can be found. +# 2. The base path, relative to the installed location in the app bundle, that +# needs to be processed. Any .so file found in this path (or a subdirectory +# of it) will be processed. +process_dylibs () { + PYTHON_XCFRAMEWORK_PATH=$1 + LIB_PATH=$2 + find "$CODESIGNING_FOLDER_PATH/$LIB_PATH" -name "*.so" | while read FULL_EXT; do + install_dylib $PYTHON_XCFRAMEWORK_PATH "$LIB_PATH/" "$FULL_EXT" + done +} + +# The entry point for post-processing a Python XCframework. +# +# Accepts 1 or more arguments: +# 1. The path, relative to the root of the Xcode project, where the Python +# XCframework can be found. If the XCframework is in the root of the project, +# 2+. The path of a package, relative to the root of the packaged app, that contains +# library content that should be processed for binary libraries. +install_python() { + PYTHON_XCFRAMEWORK_PATH=$1 + shift + + install_stdlib $PYTHON_XCFRAMEWORK_PATH + PYTHON_VER=$(ls -1 "$CODESIGNING_FOLDER_PATH/python/lib") + echo "Install Python $PYTHON_VER standard library extension modules..." + process_dylibs $PYTHON_XCFRAMEWORK_PATH python/lib/$PYTHON_VER/lib-dynload + + for package_path in $@; do + echo "Installing $package_path extension modules ..." + process_dylibs $PYTHON_XCFRAMEWORK_PATH $package_path + done +} diff --git a/iOS/testbed/Python.xcframework/ios-arm64/README b/Apple/testbed/Python.xcframework/ios-arm64/README similarity index 100% rename from iOS/testbed/Python.xcframework/ios-arm64/README rename to Apple/testbed/Python.xcframework/ios-arm64/README diff --git a/iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator/README b/Apple/testbed/Python.xcframework/ios-arm64_x86_64-simulator/README similarity index 100% rename from iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator/README rename to Apple/testbed/Python.xcframework/ios-arm64_x86_64-simulator/README diff --git a/Apple/testbed/Testbed.lldbinit b/Apple/testbed/Testbed.lldbinit new file mode 100644 index 00000000000000..4cf00dd0f9de1d --- /dev/null +++ b/Apple/testbed/Testbed.lldbinit @@ -0,0 +1,4 @@ +process handle SIGINT -n true -p true -s false +process handle SIGUSR1 -n true -p true -s false +process handle SIGUSR2 -n true -p true -s false +process handle SIGXFSZ -n true -p true -s false diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/Apple/testbed/TestbedTests/TestbedTests.m similarity index 69% rename from iOS/testbed/iOSTestbedTests/iOSTestbedTests.m rename to Apple/testbed/TestbedTests/TestbedTests.m index dd6e76f9496fe0..f7788c47f2c229 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/Apple/testbed/TestbedTests/TestbedTests.m @@ -1,11 +1,11 @@ #import #import -@interface iOSTestbedTests : XCTestCase +@interface TestbedTests : XCTestCase @end -@implementation iOSTestbedTests +@implementation TestbedTests - (void)testPython { @@ -15,6 +15,11 @@ - (void)testPython { PyStatus status; PyPreConfig preconfig; PyConfig config; + PyObject *app_packages_path; + PyObject *method_args; + PyObject *result; + PyObject *site_module; + PyObject *site_addsitedir_attr; PyObject *sys_module; PyObject *sys_path_attr; NSArray *test_args; @@ -30,19 +35,26 @@ - (void)testPython { setenv("NO_COLOR", "1", true); setenv("PYTHON_COLORS", "0", true); + if (getenv("GITHUB_ACTIONS")) { + NSLog(@"Running in a GitHub Actions environment"); + } // Arguments to pass into the test suite runner. // argv[0] must identify the process; any subsequent arg // will be handled as if it were an argument to `python -m test` - test_args = [[NSBundle mainBundle] objectForInfoDictionaryKey:@"TestArgs"]; + // The processInfo arguments contain the binary that is running, + // followed by the arguments defined in the test plan. This means: + // run_module = test_args[1] + // argv = ["Testbed"] + test_args[2:] + test_args = [[NSProcessInfo processInfo] arguments]; if (test_args == NULL) { NSLog(@"Unable to identify test arguments."); } - argv = malloc(sizeof(char *) * ([test_args count] + 1)); - argv[0] = "iOSTestbed"; - for (int i = 1; i < [test_args count]; i++) { - argv[i] = [[test_args objectAtIndex:i] UTF8String]; + NSLog(@"Test arguments: %@", test_args); + argv = malloc(sizeof(char *) * ([test_args count] - 1)); + argv[0] = "Testbed"; + for (int i = 1; i < [test_args count] - 1; i++) { + argv[i] = [[test_args objectAtIndex:i+1] UTF8String]; } - NSLog(@"Test command: %@", test_args); // Generate an isolated Python configuration. NSLog(@"Configuring isolated Python..."); @@ -63,7 +75,7 @@ - (void)testPython { // Ensure that signal handlers are installed config.install_signal_handlers = 1; // Run the test module. - config.run_module = Py_DecodeLocale([[test_args objectAtIndex:0] UTF8String], NULL); + config.run_module = Py_DecodeLocale([[test_args objectAtIndex:1] UTF8String], NULL); // For debugging - enable verbose mode. // config.verbose = 1; @@ -96,7 +108,7 @@ - (void)testPython { } NSLog(@"Configure argc/argv..."); - status = PyConfig_SetBytesArgv(&config, [test_args count], (char**) argv); + status = PyConfig_SetBytesArgv(&config, [test_args count] - 1, (char**) argv); if (PyStatus_Exception(status)) { XCTFail(@"Unable to configure argc/argv: %s", status.err_msg); PyConfig_Clear(&config); @@ -111,29 +123,55 @@ - (void)testPython { return; } - sys_module = PyImport_ImportModule("sys"); - if (sys_module == NULL) { - XCTFail(@"Could not import sys module"); + // Add app_packages as a site directory. This both adds to sys.path, + // and ensures that any .pth files in that directory will be executed. + site_module = PyImport_ImportModule("site"); + if (site_module == NULL) { + XCTFail(@"Could not import site module"); return; } - sys_path_attr = PyObject_GetAttrString(sys_module, "path"); - if (sys_path_attr == NULL) { - XCTFail(@"Could not access sys.path"); + site_addsitedir_attr = PyObject_GetAttrString(site_module, "addsitedir"); + if (site_addsitedir_attr == NULL || !PyCallable_Check(site_addsitedir_attr)) { + XCTFail(@"Could not access site.addsitedir"); return; } - // Add the app packages path path = [NSString stringWithFormat:@"%@/app_packages", resourcePath, nil]; NSLog(@"App packages path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); - failed = PyList_Insert(sys_path_attr, 0, PyUnicode_FromString([path UTF8String])); - if (failed) { - XCTFail(@"Unable to add app packages to sys.path"); + app_packages_path = PyUnicode_FromWideChar(wtmp_str, wcslen(wtmp_str)); + if (app_packages_path == NULL) { + XCTFail(@"Could not convert app_packages path to unicode"); return; } PyMem_RawFree(wtmp_str); + method_args = Py_BuildValue("(O)", app_packages_path); + if (method_args == NULL) { + XCTFail(@"Could not create arguments for site.addsitedir"); + return; + } + + result = PyObject_CallObject(site_addsitedir_attr, method_args); + if (result == NULL) { + XCTFail(@"Could not add app_packages directory using site.addsitedir"); + return; + } + + // Add test code to sys.path + sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + XCTFail(@"Could not import sys module"); + return; + } + + sys_path_attr = PyObject_GetAttrString(sys_module, "path"); + if (sys_path_attr == NULL) { + XCTFail(@"Could not access sys.path"); + return; + } + path = [NSString stringWithFormat:@"%@/app", resourcePath, nil]; NSLog(@"App path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); diff --git a/Apple/testbed/__main__.py b/Apple/testbed/__main__.py new file mode 100644 index 00000000000000..0dd77ab8b82797 --- /dev/null +++ b/Apple/testbed/__main__.py @@ -0,0 +1,435 @@ +import argparse +import json +import os +import re +import shlex +import shutil +import subprocess +import sys +from pathlib import Path + +TEST_SLICES = { + "iOS": "ios-arm64_x86_64-simulator", +} + +DECODE_ARGS = ("UTF-8", "backslashreplace") + +# The system log prefixes each line: +# 2025-01-17 16:14:29.093742+0800 iOSTestbed[23987:1fd393b4] ... +# 2025-01-17 16:14:29.093742+0800 iOSTestbed[23987:1fd393b4] ... + +LOG_PREFIX_REGEX = re.compile( + r"^\d{4}-\d{2}-\d{2}" # YYYY-MM-DD + r"\s+\d+:\d{2}:\d{2}\.\d+\+\d{4}" # HH:MM:SS.ssssss+ZZZZ + r"\s+iOSTestbed\[\d+:\w+\]" # Process/thread ID +) + + +# Select a simulator device to use. +def select_simulator_device(platform): + # List the testing simulators, in JSON format + raw_json = subprocess.check_output(["xcrun", "simctl", "list", "-j"]) + json_data = json.loads(raw_json) + + if platform == "iOS": + # Any iOS device will do; we'll look for "SE" devices - but the name + # isn't consistent over time. Older Xcode versions will use "iPhone SE + # (Nth generation)"; As of 2025, they've started using "iPhone 16e". + # + # When Xcode is updated after a new release, new devices will be + # available and old ones will be dropped from the set available on the + # latest iOS version. Select the one with the highest minimum runtime + # version - this is an indicator of the "newest" released device, which + # should always be supported on the "most recent" iOS version. + se_simulators = sorted( + (devicetype["minRuntimeVersion"], devicetype["name"]) + for devicetype in json_data["devicetypes"] + if devicetype["productFamily"] == "iPhone" + and ( + ( + "iPhone " in devicetype["name"] + and devicetype["name"].endswith("e") + ) + or "iPhone SE " in devicetype["name"] + ) + ) + simulator = se_simulators[-1][1] + else: + raise ValueError(f"Unknown platform {platform}") + + return simulator + + +def xcode_test(location: Path, platform: str, simulator: str, verbose: bool): + # Build and run the test suite on the named simulator. + args = [ + "-project", + str(location / f"{platform}Testbed.xcodeproj"), + "-scheme", + f"{platform}Testbed", + "-destination", + f"platform={platform} Simulator,name={simulator}", + "-derivedDataPath", + str(location / "DerivedData"), + ] + verbosity_args = [] if verbose else ["-quiet"] + + print("Building test project...") + subprocess.run( + ["xcodebuild", "build-for-testing"] + args + verbosity_args, + check=True, + ) + + # Any environment variable prefixed with TEST_RUNNER_ is exposed into the + # test runner environment. There are some variables (like those identifying + # CI platforms) that can be useful to have access to. + test_env = os.environ.copy() + if "GITHUB_ACTIONS" in os.environ: + test_env["TEST_RUNNER_GITHUB_ACTIONS"] = os.environ["GITHUB_ACTIONS"] + + print("Running test project...") + # Test execution *can't* be run -quiet; verbose mode + # is how we see the output of the test output. + process = subprocess.Popen( + ["xcodebuild", "test-without-building"] + args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=test_env, + ) + while line := (process.stdout.readline()).decode(*DECODE_ARGS): + # Strip the timestamp/process prefix from each log line + line = LOG_PREFIX_REGEX.sub("", line) + sys.stdout.write(line) + sys.stdout.flush() + + status = process.wait(timeout=5) + exit(status) + + +def copy(src, tgt): + """An all-purpose copy. + + If src is a file, it is copied. If src is a symlink, it is copied *as a + symlink*. If src is a directory, the full tree is duplicated, with symlinks + being preserved. + """ + if src.is_file() or src.is_symlink(): + shutil.copyfile(src, tgt, follow_symlinks=False) + else: + shutil.copytree(src, tgt, symlinks=True) + + +def clone_testbed( + source: Path, + target: Path, + framework: Path, + platform: str, + apps: list[Path], +) -> None: + if target.exists(): + print(f"{target} already exists; aborting without creating project.") + sys.exit(10) + + if framework is None: + if not ( + source / "Python.xcframework" / TEST_SLICES[platform] / "bin" + ).is_dir(): + print( + f"The testbed being cloned ({source}) does not contain " + "a framework with slices. Re-run with --framework" + ) + sys.exit(11) + else: + if not framework.is_dir(): + print(f"{framework} does not exist.") + sys.exit(12) + elif not ( + framework.suffix == ".xcframework" + or (framework / "Python.framework").is_dir() + ): + print( + f"{framework} is not an XCframework, " + f"or a simulator slice of a framework build." + ) + sys.exit(13) + + print("Cloning testbed project:") + print(f" Cloning {source}...", end="") + # Only copy the files for the platform being cloned plus the files common + # to all platforms. The XCframework will be copied later, if needed. + target.mkdir(parents=True) + + for name in [ + "__main__.py", + "TestbedTests", + "Testbed.lldbinit", + f"{platform}Testbed", + f"{platform}Testbed.xcodeproj", + f"{platform}Testbed.xctestplan", + ]: + copy(source / name, target / name) + + print(" done") + + orig_xc_framework_path = source / "Python.xcframework" + xc_framework_path = target / "Python.xcframework" + test_framework_path = xc_framework_path / TEST_SLICES[platform] + if framework is not None: + if framework.suffix == ".xcframework": + print(" Installing XCFramework...", end="") + xc_framework_path.symlink_to( + framework.relative_to(xc_framework_path.parent, walk_up=True) + ) + print(" done") + else: + print(" Installing simulator framework...", end="") + # We're only installing a slice of a framework; we need + # to do a full tree copy to make sure we don't damage + # symlinked content. + shutil.copytree(orig_xc_framework_path, xc_framework_path) + if test_framework_path.is_dir(): + shutil.rmtree(test_framework_path) + else: + test_framework_path.unlink(missing_ok=True) + test_framework_path.symlink_to( + framework.relative_to(test_framework_path.parent, walk_up=True) + ) + print(" done") + else: + copy(orig_xc_framework_path, xc_framework_path) + + if ( + xc_framework_path.is_symlink() + and not xc_framework_path.readlink().is_absolute() + ): + # XCFramework is a relative symlink. Rewrite the symlink relative + # to the new location. + print(" Rewriting symlink to XCframework...", end="") + resolved_xc_framework_path = ( + source / xc_framework_path.readlink() + ).resolve() + xc_framework_path.unlink() + xc_framework_path.symlink_to( + resolved_xc_framework_path.relative_to( + xc_framework_path.parent, walk_up=True + ) + ) + print(" done") + elif ( + test_framework_path.is_symlink() + and not test_framework_path.readlink().is_absolute() + ): + print(" Rewriting symlink to simulator framework...", end="") + # Simulator framework is a relative symlink. Rewrite the symlink + # relative to the new location. + orig_test_framework_path = ( + source / "Python.XCframework" / test_framework_path.readlink() + ).resolve() + test_framework_path.unlink() + test_framework_path.symlink_to( + orig_test_framework_path.relative_to( + test_framework_path.parent, walk_up=True + ) + ) + print(" done") + else: + print(" Using pre-existing Python framework.") + + for app_src in apps: + print(f" Installing app {app_src.name!r}...", end="") + app_target = target / f"Testbed/app/{app_src.name}" + if app_target.is_dir(): + shutil.rmtree(app_target) + shutil.copytree(app_src, app_target) + print(" done") + + print(f"Successfully cloned testbed: {target.resolve()}") + + +def update_test_plan(testbed_path, platform, args): + # Modify the test plan to use the requested test arguments. + test_plan_path = testbed_path / f"{platform}Testbed.xctestplan" + with test_plan_path.open("r", encoding="utf-8") as f: + test_plan = json.load(f) + + test_plan["defaultOptions"]["commandLineArgumentEntries"] = [ + {"argument": shlex.quote(arg)} for arg in args + ] + + with test_plan_path.open("w", encoding="utf-8") as f: + json.dump(test_plan, f, indent=2) + + +def run_testbed( + platform: str, + simulator: str | None, + args: list[str], + verbose: bool = False, +): + location = Path(__file__).parent + print("Updating test plan...", end="") + update_test_plan(location, platform, args) + print(" done.") + + if simulator is None: + simulator = select_simulator_device(platform) + print(f"Running test on {simulator}") + + xcode_test( + location, + platform=platform, + simulator=simulator, + verbose=verbose, + ) + + +def main(): + # Look for directories like `iOSTestbed` as an indicator of the platforms + # that the testbed folder supports. The original source testbed can support + # many platforms, but when cloned, only one platform is preserved. + available_platforms = [ + platform + for platform in ["iOS"] + if (Path(__file__).parent / f"{platform}Testbed").is_dir() + ] + + parser = argparse.ArgumentParser( + description=( + "Manages the process of testing an Apple Python project " + "through Xcode." + ), + ) + + subcommands = parser.add_subparsers(dest="subcommand") + clone = subcommands.add_parser( + "clone", + description=( + "Clone the testbed project, copying in a Python framework and" + "any specified application code." + ), + help="Clone a testbed project to a new location.", + ) + clone.add_argument( + "--framework", + help=( + "The location of the XCFramework (or simulator-only slice of an " + "XCFramework) to use when running the testbed" + ), + ) + clone.add_argument( + "--platform", + dest="platform", + choices=available_platforms, + default=available_platforms[0], + help=f"The platform to target (default: {available_platforms[0]})", + ) + clone.add_argument( + "--app", + dest="apps", + action="append", + default=[], + help="The location of any code to include in the testbed project", + ) + clone.add_argument( + "location", + help="The path where the testbed will be cloned.", + ) + + run = subcommands.add_parser( + "run", + usage=( + "%(prog)s [-h] [--simulator SIMULATOR] -- " + " [ ...]" + ), + description=( + "Run a testbed project. The arguments provided after `--` will be " + "passed to the running iOS process as if they were arguments to " + "`python -m`." + ), + help="Run a testbed project", + ) + run.add_argument( + "--platform", + dest="platform", + choices=available_platforms, + default=available_platforms[0], + help=f"The platform to target (default: {available_platforms[0]})", + ) + run.add_argument( + "--simulator", + help=( + "The name of the simulator to use (eg: 'iPhone 16e'). Defaults to " + "the most recently released 'entry level' iPhone device. Device " + "architecture and OS version can also be specified; e.g., " + "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would run on " + "an ARM64 iPhone 16 Pro simulator running iOS 26.0." + ), + ) + run.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose output", + ) + + try: + pos = sys.argv.index("--") + testbed_args = sys.argv[1:pos] + test_args = sys.argv[pos + 1 :] + except ValueError: + testbed_args = sys.argv[1:] + test_args = [] + + context = parser.parse_args(testbed_args) + + if context.subcommand == "clone": + clone_testbed( + source=Path(__file__).parent.resolve(), + target=Path(context.location).resolve(), + framework=Path(context.framework).resolve() + if context.framework + else None, + platform=context.platform, + apps=[Path(app) for app in context.apps], + ) + elif context.subcommand == "run": + if test_args: + if not ( + Path(__file__).parent + / "Python.xcframework" + / TEST_SLICES[context.platform] + / "bin" + ).is_dir(): + print( + "Testbed does not contain a compiled Python framework. " + f"Use `python {sys.argv[0]} clone ...` to create a " + "runnable clone of this testbed." + ) + sys.exit(20) + + run_testbed( + platform=context.platform, + simulator=context.simulator, + verbose=context.verbose, + args=test_args, + ) + else: + print( + "Must specify test arguments " + f"(e.g., {sys.argv[0]} run -- test)" + ) + print() + parser.print_help(sys.stderr) + sys.exit(21) + else: + parser.print_help(sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + # Under the buildbot, stdout is not a TTY, but we must still flush after + # every line to make sure our output appears in the correct order relative + # to the output of our subprocesses. + for stream in [sys.stdout, sys.stderr]: + stream.reconfigure(line_buffering=True) + main() diff --git a/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj b/Apple/testbed/iOSTestbed.xcodeproj/project.pbxproj similarity index 78% rename from iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj rename to Apple/testbed/iOSTestbed.xcodeproj/project.pbxproj index c7d63909ee2453..f8835a3bc587df 100644 --- a/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj +++ b/Apple/testbed/iOSTestbed.xcodeproj/project.pbxproj @@ -11,12 +11,11 @@ 607A66222B0EFA390010BFC8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 607A66212B0EFA390010BFC8 /* Assets.xcassets */; }; 607A66252B0EFA390010BFC8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 607A66232B0EFA390010BFC8 /* LaunchScreen.storyboard */; }; 607A66282B0EFA390010BFC8 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 607A66272B0EFA390010BFC8 /* main.m */; }; - 607A66322B0EFA3A0010BFC8 /* iOSTestbedTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 607A66312B0EFA3A0010BFC8 /* iOSTestbedTests.m */; }; + 607A66322B0EFA3A0010BFC8 /* TestbedTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 607A66312B0EFA3A0010BFC8 /* TestbedTests.m */; }; 607A664C2B0EFC080010BFC8 /* Python.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 607A664A2B0EFB310010BFC8 /* Python.xcframework */; }; 607A664D2B0EFC080010BFC8 /* Python.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 607A664A2B0EFB310010BFC8 /* Python.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 607A66502B0EFFE00010BFC8 /* Python.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 607A664A2B0EFB310010BFC8 /* Python.xcframework */; }; 607A66512B0EFFE00010BFC8 /* Python.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 607A664A2B0EFB310010BFC8 /* Python.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; - 607A66582B0F079F0010BFC8 /* dylib-Info-template.plist in Resources */ = {isa = PBXBuildFile; fileRef = 607A66572B0F079F0010BFC8 /* dylib-Info-template.plist */; }; 608619542CB77BA900F46182 /* app_packages in Resources */ = {isa = PBXBuildFile; fileRef = 608619532CB77BA900F46182 /* app_packages */; }; 608619562CB7819B00F46182 /* app in Resources */ = {isa = PBXBuildFile; fileRef = 608619552CB7819B00F46182 /* app */; }; /* End PBXBuildFile section */ @@ -64,12 +63,12 @@ 607A66242B0EFA390010BFC8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 607A66272B0EFA390010BFC8 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; 607A662D2B0EFA3A0010BFC8 /* iOSTestbedTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = iOSTestbedTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; - 607A66312B0EFA3A0010BFC8 /* iOSTestbedTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = iOSTestbedTests.m; sourceTree = ""; }; + 607A66312B0EFA3A0010BFC8 /* TestbedTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = TestbedTests.m; sourceTree = ""; }; 607A664A2B0EFB310010BFC8 /* Python.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = Python.xcframework; sourceTree = ""; }; - 607A66572B0F079F0010BFC8 /* dylib-Info-template.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "dylib-Info-template.plist"; sourceTree = ""; }; 607A66592B0F08600010BFC8 /* iOSTestbed-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "iOSTestbed-Info.plist"; sourceTree = ""; }; 608619532CB77BA900F46182 /* app_packages */ = {isa = PBXFileReference; lastKnownFileType = folder; path = app_packages; sourceTree = ""; }; 608619552CB7819B00F46182 /* app */ = {isa = PBXFileReference; lastKnownFileType = folder; path = app; sourceTree = ""; }; + 60FE0EFB2E56BB6D00524F87 /* iOSTestbed.xctestplan */ = {isa = PBXFileReference; lastKnownFileType = text; path = iOSTestbed.xctestplan; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -95,9 +94,10 @@ 607A66092B0EFA380010BFC8 = { isa = PBXGroup; children = ( + 60FE0EFB2E56BB6D00524F87 /* iOSTestbed.xctestplan */, 607A664A2B0EFB310010BFC8 /* Python.xcframework */, 607A66142B0EFA380010BFC8 /* iOSTestbed */, - 607A66302B0EFA3A0010BFC8 /* iOSTestbedTests */, + 607A66302B0EFA3A0010BFC8 /* TestbedTests */, 607A66132B0EFA380010BFC8 /* Products */, 607A664F2B0EFFE00010BFC8 /* Frameworks */, ); @@ -118,7 +118,6 @@ 608619552CB7819B00F46182 /* app */, 608619532CB77BA900F46182 /* app_packages */, 607A66592B0F08600010BFC8 /* iOSTestbed-Info.plist */, - 607A66572B0F079F0010BFC8 /* dylib-Info-template.plist */, 607A66152B0EFA380010BFC8 /* AppDelegate.h */, 607A66162B0EFA380010BFC8 /* AppDelegate.m */, 607A66212B0EFA390010BFC8 /* Assets.xcassets */, @@ -128,12 +127,12 @@ path = iOSTestbed; sourceTree = ""; }; - 607A66302B0EFA3A0010BFC8 /* iOSTestbedTests */ = { + 607A66302B0EFA3A0010BFC8 /* TestbedTests */ = { isa = PBXGroup; children = ( - 607A66312B0EFA3A0010BFC8 /* iOSTestbedTests.m */, + 607A66312B0EFA3A0010BFC8 /* TestbedTests.m */, ); - path = iOSTestbedTests; + path = TestbedTests; sourceTree = ""; }; 607A664F2B0EFFE00010BFC8 /* Frameworks */ = { @@ -153,8 +152,7 @@ 607A660E2B0EFA380010BFC8 /* Sources */, 607A660F2B0EFA380010BFC8 /* Frameworks */, 607A66102B0EFA380010BFC8 /* Resources */, - 607A66552B0F061D0010BFC8 /* Install Target Specific Python Standard Library */, - 607A66562B0F06200010BFC8 /* Prepare Python Binary Modules */, + 607A66552B0F061D0010BFC8 /* Process Python libraries */, 607A664E2B0EFC080010BFC8 /* Embed Frameworks */, ); buildRules = ( @@ -228,7 +226,6 @@ buildActionMask = 2147483647; files = ( 607A66252B0EFA390010BFC8 /* LaunchScreen.storyboard in Resources */, - 607A66582B0F079F0010BFC8 /* dylib-Info-template.plist in Resources */, 608619562CB7819B00F46182 /* app in Resources */, 607A66222B0EFA390010BFC8 /* Assets.xcassets in Resources */, 608619542CB77BA900F46182 /* app_packages in Resources */, @@ -245,7 +242,7 @@ /* End PBXResourcesBuildPhase section */ /* Begin PBXShellScriptBuildPhase section */ - 607A66552B0F061D0010BFC8 /* Install Target Specific Python Standard Library */ = { + 607A66552B0F061D0010BFC8 /* Process Python libraries */ = { isa = PBXShellScriptBuildPhase; alwaysOutOfDate = 1; buildActionMask = 2147483647; @@ -255,34 +252,14 @@ ); inputPaths = ( ); - name = "Install Target Specific Python Standard Library"; + name = "Process Python libraries"; outputFileListPaths = ( ); outputPaths = ( ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -e\n\nmkdir -p \"$CODESIGNING_FOLDER_PATH/python/lib\"\nif [ \"$EFFECTIVE_PLATFORM_NAME\" = \"-iphonesimulator\" ]; then\n echo \"Installing Python modules for iOS Simulator\"\n rsync -au --delete \"$PROJECT_DIR/Python.xcframework/ios-arm64_x86_64-simulator/lib/\" \"$CODESIGNING_FOLDER_PATH/python/lib/\" \nelse\n echo \"Installing Python modules for iOS Device\"\n rsync -au --delete \"$PROJECT_DIR/Python.xcframework/ios-arm64/lib/\" \"$CODESIGNING_FOLDER_PATH/python/lib/\" \nfi\n"; - showEnvVarsInLog = 0; - }; - 607A66562B0F06200010BFC8 /* Prepare Python Binary Modules */ = { - isa = PBXShellScriptBuildPhase; - alwaysOutOfDate = 1; - buildActionMask = 2147483647; - files = ( - ); - inputFileListPaths = ( - ); - inputPaths = ( - ); - name = "Prepare Python Binary Modules"; - outputFileListPaths = ( - ); - outputPaths = ( - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "set -e\n\ninstall_dylib () {\n INSTALL_BASE=$1\n FULL_EXT=$2\n\n # The name of the extension file\n EXT=$(basename \"$FULL_EXT\")\n # The location of the extension file, relative to the bundle\n RELATIVE_EXT=${FULL_EXT#$CODESIGNING_FOLDER_PATH/} \n # The path to the extension file, relative to the install base\n PYTHON_EXT=${RELATIVE_EXT/$INSTALL_BASE/}\n # The full dotted name of the extension module, constructed from the file path.\n FULL_MODULE_NAME=$(echo $PYTHON_EXT | cut -d \".\" -f 1 | tr \"/\" \".\"); \n # A bundle identifier; not actually used, but required by Xcode framework packaging\n FRAMEWORK_BUNDLE_ID=$(echo $PRODUCT_BUNDLE_IDENTIFIER.$FULL_MODULE_NAME | tr \"_\" \"-\")\n # The name of the framework folder.\n FRAMEWORK_FOLDER=\"Frameworks/$FULL_MODULE_NAME.framework\"\n\n # If the framework folder doesn't exist, create it.\n if [ ! -d \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER\" ]; then\n echo \"Creating framework for $RELATIVE_EXT\" \n mkdir -p \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER\"\n cp \"$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n plutil -replace CFBundleExecutable -string \"$FULL_MODULE_NAME\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n plutil -replace CFBundleIdentifier -string \"$FRAMEWORK_BUNDLE_ID\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n fi\n \n echo \"Installing binary for $FRAMEWORK_FOLDER/$FULL_MODULE_NAME\" \n mv \"$FULL_EXT\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME\"\n # Create a placeholder .fwork file where the .so was\n echo \"$FRAMEWORK_FOLDER/$FULL_MODULE_NAME\" > ${FULL_EXT%.so}.fwork\n # Create a back reference to the .so file location in the framework\n echo \"${RELATIVE_EXT%.so}.fwork\" > \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME.origin\" \n}\n\nPYTHON_VER=$(ls -1 \"$CODESIGNING_FOLDER_PATH/python/lib\")\necho \"Install Python $PYTHON_VER standard library extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/python/lib/$PYTHON_VER/lib-dynload\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib python/lib/$PYTHON_VER/lib-dynload/ \"$FULL_EXT\"\ndone\necho \"Install app package extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/app_packages\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib app_packages/ \"$FULL_EXT\"\ndone\necho \"Install app extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/app\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib app/ \"$FULL_EXT\"\ndone\n\n# Clean up dylib template \nrm -f \"$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist\"\necho \"Signing frameworks as $EXPANDED_CODE_SIGN_IDENTITY_NAME ($EXPANDED_CODE_SIGN_IDENTITY)...\"\nfind \"$CODESIGNING_FOLDER_PATH/Frameworks\" -name \"*.framework\" -exec /usr/bin/codesign --force --sign \"$EXPANDED_CODE_SIGN_IDENTITY\" ${OTHER_CODE_SIGN_FLAGS:-} -o runtime --timestamp=none --preserve-metadata=identifier,entitlements,flags --generate-entitlement-der \"{}\" \\; \n"; + shellScript = "set -e\nsource $PROJECT_DIR/Python.xcframework/build/utils.sh\ninstall_python Python.xcframework app app_packages\n"; showEnvVarsInLog = 0; }; /* End PBXShellScriptBuildPhase section */ @@ -301,7 +278,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 607A66322B0EFA3A0010BFC8 /* iOSTestbedTests.m in Sources */, + 607A66322B0EFA3A0010BFC8 /* TestbedTests.m in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -379,7 +356,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; LOCALIZATION_PREFERS_STRING_CATALOGS = YES; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; @@ -434,7 +411,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; LOCALIZATION_PREFERS_STRING_CATALOGS = YES; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; @@ -460,7 +437,7 @@ INFOPLIST_KEY_UIMainStoryboardFile = Main; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", @@ -491,7 +468,7 @@ INFOPLIST_KEY_UIMainStoryboardFile = Main; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", @@ -514,7 +491,7 @@ DEVELOPMENT_TEAM = 3HEZE76D99; GENERATE_INFOPLIST_FILE = YES; HEADER_SEARCH_PATHS = "\"$(BUILT_PRODUCTS_DIR)/Python.framework/Headers\""; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = org.python.iOSTestbedTests; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -534,7 +511,7 @@ DEVELOPMENT_TEAM = 3HEZE76D99; GENERATE_INFOPLIST_FILE = YES; HEADER_SEARCH_PATHS = "\"$(BUILT_PRODUCTS_DIR)/Python.framework/Headers\""; - IPHONEOS_DEPLOYMENT_TARGET = 12.0; + IPHONEOS_DEPLOYMENT_TARGET = 13.0; MARKETING_VERSION = 1.0; PRODUCT_BUNDLE_IDENTIFIER = org.python.iOSTestbedTests; PRODUCT_NAME = "$(TARGET_NAME)"; diff --git a/Apple/testbed/iOSTestbed.xcodeproj/xcshareddata/xcschemes/iOSTestbed.xcscheme b/Apple/testbed/iOSTestbed.xcodeproj/xcshareddata/xcschemes/iOSTestbed.xcscheme new file mode 100644 index 00000000000000..3c330a4152bf92 --- /dev/null +++ b/Apple/testbed/iOSTestbed.xcodeproj/xcshareddata/xcschemes/iOSTestbed.xcscheme @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Apple/testbed/iOSTestbed.xctestplan b/Apple/testbed/iOSTestbed.xctestplan new file mode 100644 index 00000000000000..0c4ab9eb2bad30 --- /dev/null +++ b/Apple/testbed/iOSTestbed.xctestplan @@ -0,0 +1,46 @@ +{ + "configurations" : [ + { + "id" : "F5A95CE4-1ADE-4A6E-A0E1-CDBAE26DF0C5", + "name" : "Test Scheme Action", + "options" : { + + } + } + ], + "defaultOptions" : { + "commandLineArgumentEntries" : [ + { + "argument" : "test" + }, + { + "argument" : "-uall" + }, + { + "argument" : "--single-process" + }, + { + "argument" : "--rerun" + }, + { + "argument" : "-W" + } + ], + "targetForVariableExpansion" : { + "containerPath" : "container:iOSTestbed.xcodeproj", + "identifier" : "607A66112B0EFA380010BFC8", + "name" : "iOSTestbed" + } + }, + "testTargets" : [ + { + "parallelizable" : false, + "target" : { + "containerPath" : "container:iOSTestbed.xcodeproj", + "identifier" : "607A662C2B0EFA3A0010BFC8", + "name" : "iOSTestbedTests" + } + } + ], + "version" : 1 +} diff --git a/iOS/testbed/iOSTestbed/AppDelegate.h b/Apple/testbed/iOSTestbed/AppDelegate.h similarity index 100% rename from iOS/testbed/iOSTestbed/AppDelegate.h rename to Apple/testbed/iOSTestbed/AppDelegate.h diff --git a/iOS/testbed/iOSTestbed/AppDelegate.m b/Apple/testbed/iOSTestbed/AppDelegate.m similarity index 100% rename from iOS/testbed/iOSTestbed/AppDelegate.m rename to Apple/testbed/iOSTestbed/AppDelegate.m diff --git a/iOS/testbed/iOSTestbed/Assets.xcassets/AccentColor.colorset/Contents.json b/Apple/testbed/iOSTestbed/Assets.xcassets/AccentColor.colorset/Contents.json similarity index 100% rename from iOS/testbed/iOSTestbed/Assets.xcassets/AccentColor.colorset/Contents.json rename to Apple/testbed/iOSTestbed/Assets.xcassets/AccentColor.colorset/Contents.json diff --git a/iOS/testbed/iOSTestbed/Assets.xcassets/AppIcon.appiconset/Contents.json b/Apple/testbed/iOSTestbed/Assets.xcassets/AppIcon.appiconset/Contents.json similarity index 100% rename from iOS/testbed/iOSTestbed/Assets.xcassets/AppIcon.appiconset/Contents.json rename to Apple/testbed/iOSTestbed/Assets.xcassets/AppIcon.appiconset/Contents.json diff --git a/iOS/testbed/iOSTestbed/Assets.xcassets/Contents.json b/Apple/testbed/iOSTestbed/Assets.xcassets/Contents.json similarity index 100% rename from iOS/testbed/iOSTestbed/Assets.xcassets/Contents.json rename to Apple/testbed/iOSTestbed/Assets.xcassets/Contents.json diff --git a/iOS/testbed/iOSTestbed/Base.lproj/LaunchScreen.storyboard b/Apple/testbed/iOSTestbed/Base.lproj/LaunchScreen.storyboard similarity index 100% rename from iOS/testbed/iOSTestbed/Base.lproj/LaunchScreen.storyboard rename to Apple/testbed/iOSTestbed/Base.lproj/LaunchScreen.storyboard diff --git a/iOS/testbed/iOSTestbed/app/README b/Apple/testbed/iOSTestbed/app/README similarity index 92% rename from iOS/testbed/iOSTestbed/app/README rename to Apple/testbed/iOSTestbed/app/README index af22c685f87976..46c0e8e2a29a1c 100644 --- a/iOS/testbed/iOSTestbed/app/README +++ b/Apple/testbed/iOSTestbed/app/README @@ -1,7 +1,7 @@ This folder can contain any Python application code. During the build, any binary modules found in this folder will be processed into -iOS Framework form. +Framework form. When the test suite runs, this folder will be on the PYTHONPATH, and will be the working directory for the test suite. diff --git a/iOS/testbed/iOSTestbed/app_packages/README b/Apple/testbed/iOSTestbed/app_packages/README similarity index 92% rename from iOS/testbed/iOSTestbed/app_packages/README rename to Apple/testbed/iOSTestbed/app_packages/README index 42d7fdeb813250..02c2beccfbdaed 100644 --- a/iOS/testbed/iOSTestbed/app_packages/README +++ b/Apple/testbed/iOSTestbed/app_packages/README @@ -2,6 +2,6 @@ This folder can be a target for installing any Python dependencies needed by the test suite. During the build, any binary modules found in this folder will be processed into -iOS Framework form. +Framework form. When the test suite runs, this folder will be on the PYTHONPATH. diff --git a/iOS/testbed/iOSTestbed/iOSTestbed-Info.plist b/Apple/testbed/iOSTestbed/iOSTestbed-Info.plist similarity index 74% rename from iOS/testbed/iOSTestbed/iOSTestbed-Info.plist rename to Apple/testbed/iOSTestbed/iOSTestbed-Info.plist index a582f42a212783..fea45e1fad6f6f 100644 --- a/iOS/testbed/iOSTestbed/iOSTestbed-Info.plist +++ b/Apple/testbed/iOSTestbed/iOSTestbed-Info.plist @@ -41,18 +41,6 @@ UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight - TestArgs - - test - -uall - --single-process - --rerun - -W - - UIApplicationSceneManifest UIApplicationSupportsMultipleScenes diff --git a/iOS/testbed/iOSTestbed/main.m b/Apple/testbed/iOSTestbed/main.m similarity index 100% rename from iOS/testbed/iOSTestbed/main.m rename to Apple/testbed/iOSTestbed/main.m diff --git a/Doc/Makefile b/Doc/Makefile index c8a749a02a89ec..f16d9cacb1b6fb 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -170,6 +170,7 @@ venv: echo "venv already exists."; \ echo "To recreate it, remove it first with \`make clean-venv'."; \ else \ + set -e; \ echo "Creating venv in $(VENVDIR)"; \ if $(UV) --version >/dev/null 2>&1; then \ $(UV) venv --python=$(PYTHON) $(VENVDIR); \ @@ -183,7 +184,7 @@ venv: fi .PHONY: dist-no-html -dist-no-html: dist-text dist-pdf dist-epub dist-texinfo +dist-no-html: dist-text dist-epub dist-texinfo .PHONY: dist dist: @@ -240,7 +241,8 @@ dist-pdf: # as otherwise the full latexmk process is run twice. # ($$ is needed to escape the $; https://www.gnu.org/software/make/manual/make.html#Basics-of-Variable-References) -sed -i 's/: all-$$(FMT)/:/' build/latex/Makefile - (cd build/latex; $(MAKE) clean && $(MAKE) --jobs=$$((`nproc`+1)) --output-sync LATEXMKOPTS='-quiet' all-pdf && $(MAKE) FMT=pdf zip bz2) + if [ -n "$(filter output-sync,$(value .FEATURES))" ]; then OUTPUTSYNC=--output-sync; else OUTPUTSYNC=; fi && \ + (cd build/latex; $(MAKE) clean && $(MAKE) --jobs=$$((`getconf _NPROCESSORS_ONLN`+1)) $$OUTPUTSYNC LATEXMKOPTS='-quiet' all-pdf && $(MAKE) FMT=pdf zip bz2) cp build/latex/docs-pdf.zip dist/python-$(DISTVERSION)-docs-pdf-a4.zip cp build/latex/docs-pdf.tar.bz2 dist/python-$(DISTVERSION)-docs-pdf-a4.tar.bz2 @echo "Build finished and archived!" diff --git a/Doc/bugs.rst b/Doc/bugs.rst index 5d0f68ca69675e..1d27579e53f4ef 100644 --- a/Doc/bugs.rst +++ b/Doc/bugs.rst @@ -19,6 +19,12 @@ If you find a bug in this documentation or would like to propose an improvement, please submit a bug report on the :ref:`issue tracker `. If you have a suggestion on how to fix it, include that as well. +.. only:: translation + + If the bug or suggested improvement concerns the translation of this + documentation, submit the report to the + `translation’s repository `_ instead. + You can also open a discussion item on our `Documentation Discourse forum `_. diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 7cbc99ad145ad4..59044d2d88cc16 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -16,7 +16,20 @@ Allocating Objects on the Heap Initialize a newly allocated object *op* with its type and initial reference. Returns the initialized object. Other fields of the object are - not affected. + not initialized. Despite its name, this function is unrelated to the + object's :meth:`~object.__init__` method (:c:member:`~PyTypeObject.tp_init` + slot). Specifically, this function does **not** call the object's + :meth:`!__init__` method. + + In general, consider this function to be a low-level routine. Use + :c:member:`~PyTypeObject.tp_alloc` where possible. + For implementing :c:member:`!tp_alloc` for your type, prefer + :c:func:`PyType_GenericAlloc` or :c:func:`PyObject_New`. + + .. note:: + + This function only initializes the object's memory corresponding to the + initial :c:type:`PyObject` structure. It does not zero the rest. .. c:function:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) @@ -24,43 +37,108 @@ Allocating Objects on the Heap This does everything :c:func:`PyObject_Init` does, and also initializes the length information for a variable-size object. + .. note:: + + This function only initializes some of the object's memory. It does not + zero the rest. + .. c:macro:: PyObject_New(TYPE, typeobj) - Allocate a new Python object using the C structure type *TYPE* - and the Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header are not initialized. - The caller will own the only reference to the object - (i.e. its reference count will be one). - The size of the memory allocation is determined from the - :c:member:`~PyTypeObject.tp_basicsize` field of the type object. + Allocates a new Python object using the C structure type *TYPE* and the + Python type object *typeobj* (``PyTypeObject*``) by calling + :c:func:`PyObject_Malloc` to allocate memory and initializing it like + :c:func:`PyObject_Init`. The caller will own the only reference to the + object (i.e. its reference count will be one). + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This macro does not call :c:member:`~PyTypeObject.tp_alloc`, + :c:member:`~PyTypeObject.tp_new` (:meth:`~object.__new__`), or + :c:member:`~PyTypeObject.tp_init` (:meth:`~object.__init__`). + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_New` instead. + + Memory allocated by this macro must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_New` instead. + PyObject *foo = PyObject_CallNoArgs((PyObject *)&PyFoo_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:macro:: PyObject_NewVar(TYPE, typeobj, size) - Allocate a new Python object using the C structure type *TYPE* and the - Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header - are not initialized. The allocated memory allows for the *TYPE* structure - plus *size* (``Py_ssize_t``) fields of the size - given by the :c:member:`~PyTypeObject.tp_itemsize` field of - *typeobj*. This is useful for implementing objects like tuples, which are - able to determine their size at construction time. Embedding the array of - fields into the same allocation decreases the number of allocations, - improving the memory management efficiency. + Like :c:macro:`PyObject_New` except: + + * It allocates enough memory for the *TYPE* structure plus *size* + (``Py_ssize_t``) fields of the size given by the + :c:member:`~PyTypeObject.tp_itemsize` field of *typeobj*. + * The memory is initialized like :c:func:`PyObject_InitVar`. + + This is useful for implementing objects like tuples, which are able to + determine their size at construction time. Embedding the array of fields + into the same allocation decreases the number of allocations, improving the + memory management efficiency. + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_NewVar` + instead. - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_NewVar` instead. + Memory allocated by this function must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + .. note:: -.. c:function:: void PyObject_Del(void *op) + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *list_instance = PyObject_CallNoArgs((PyObject *)&PyList_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` - Same as :c:func:`PyObject_Free`. .. c:var:: PyObject _Py_NoneStruct @@ -71,6 +149,38 @@ Allocating Objects on the Heap .. seealso:: - :c:func:`PyModule_Create` + :ref:`moduleobjects` To allocate and create extension modules. + +Deprecated aliases +^^^^^^^^^^^^^^^^^^ + +These are :term:`soft deprecated` aliases to existing functions and macros. +They exist solely for backwards compatibility. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Deprecated alias + * Function + * * .. c:macro:: PyObject_NEW(type, typeobj) + * :c:macro:`PyObject_New` + * * .. c:macro:: PyObject_NEW_VAR(type, typeobj, n) + * :c:macro:`PyObject_NewVar` + * * .. c:macro:: PyObject_INIT(op, typeobj) + * :c:func:`PyObject_Init` + * * .. c:macro:: PyObject_INIT_VAR(op, typeobj, n) + * :c:func:`PyObject_InitVar` + * * .. c:macro:: PyObject_MALLOC(n) + * :c:func:`PyObject_Malloc` + * * .. c:macro:: PyObject_REALLOC(p, n) + * :c:func:`PyObject_Realloc` + * * .. c:macro:: PyObject_FREE(p) + * :c:func:`PyObject_Free` + * * .. c:macro:: PyObject_DEL(p) + * :c:func:`PyObject_Free` + * * .. c:macro:: PyObject_Del(p) + * :c:func:`PyObject_Free` diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 3bbc990b6329c0..005a46a85caf82 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -113,18 +113,14 @@ There are three ways strings and buffers can be converted to C: ``z`` (:class:`str` or ``None``) [const char \*] Like ``s``, but the Python object may also be ``None``, in which case the C pointer is set to ``NULL``. - It is the same as ``s?`` with the C pointer was initialized to ``NULL``. ``z*`` (:class:`str`, :term:`bytes-like object` or ``None``) [Py_buffer] Like ``s*``, but the Python object may also be ``None``, in which case the ``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``. - It is the same as ``s*?`` with the ``buf`` member of the :c:type:`Py_buffer` - structure was initialized to ``NULL``. ``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Like ``s#``, but the Python object may also be ``None``, in which case the C pointer is set to ``NULL``. - It is the same as ``s#?`` with the C pointer was initialized to ``NULL``. ``y`` (read-only :term:`bytes-like object`) [const char \*] This format converts a bytes-like object to a C pointer to a @@ -164,7 +160,7 @@ There are three ways strings and buffers can be converted to C: ``w*`` (read-write :term:`bytes-like object`) [Py_buffer] This format accepts any object which implements the read-write buffer interface. It fills a :c:type:`Py_buffer` structure provided by the caller. - The buffer may contain embedded null bytes. The caller have to call + The buffer may contain embedded null bytes. The caller has to call :c:func:`PyBuffer_Release` when it is done with the buffer. ``es`` (:class:`str`) [const char \*encoding, char \*\*buffer] @@ -387,17 +383,6 @@ Other objects Non-tuple sequences are deprecated if *items* contains format units which store a borrowed buffer or a borrowed reference. -``unit?`` (anything or ``None``) [*matching-variable(s)*] - ``?`` modifies the behavior of the preceding format unit. - The C variable(s) corresponding to that parameter should be initialized - to their default value --- when the argument is ``None``, - :c:func:`PyArg_ParseTuple` does not touch the contents of the corresponding - C variable(s). - If the argument is not ``None``, it is parsed according to the specified - format unit. - - .. versionadded:: 3.14 - A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: @@ -685,6 +670,13 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + + Be aware that this format requires an ``int`` argument. + Unlike most other contexts in C, variadic arguments are not coerced to + a suitable type automatically. + You can convert another type (for example, a pointer or a float) to a + suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst index d3081894eadaf5..6bb72a2312be3b 100644 --- a/Doc/c-api/buffer.rst +++ b/Doc/c-api/buffer.rst @@ -261,6 +261,10 @@ readonly, format MUST be consistent for all consumers. For example, :c:expr:`PyBUF_SIMPLE | PyBUF_WRITABLE` can be used to request a simple writable buffer. + .. c:macro:: PyBUF_WRITEABLE + + This is a :term:`soft deprecated` alias to :c:macro:`PyBUF_WRITABLE`. + .. c:macro:: PyBUF_FORMAT Controls the :c:member:`~Py_buffer.format` field. If set, this field MUST diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index d47beee68eaa33..7d8a511e100cf4 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -219,3 +219,38 @@ called with a non-bytes parameter. reallocation fails, the original bytes object at *\*bytes* is deallocated, *\*bytes* is set to ``NULL``, :exc:`MemoryError` is set, and ``-1`` is returned. + + +.. c:function:: PyObject *PyBytes_Repr(PyObject *bytes, int smartquotes) + + Get the string representation of *bytes*. This function is currently used to + implement :meth:`!bytes.__repr__` in Python. + + This function does not do type checking; it is undefined behavior to pass + *bytes* as a non-bytes object or ``NULL``. + + If *smartquotes* is true, the representation will use a double-quoted string + instead of single-quoted string when single-quotes are present in *bytes*. + For example, the byte string ``'Python'`` would be represented as + ``b"'Python'"`` when *smartquotes* is true, or ``b'\'Python\''`` when it is + false. + + On success, this function returns a :term:`strong reference` to a + :class:`str` object containing the representation. On failure, this + returns ``NULL`` with an exception set. + + +.. c:function:: PyObject *PyBytes_DecodeEscape(const char *s, Py_ssize_t len, const char *errors, Py_ssize_t unicode, const char *recode_encoding) + + Unescape a backslash-escaped string *s*. *s* must not be ``NULL``. + *len* must be the size of *s*. + + *errors* must be one of ``"strict"``, ``"replace"``, or ``"ignore"``. If + *errors* is ``NULL``, then ``"strict"`` is used by default. + + On success, this function returns a :term:`strong reference` to a Python + :class:`bytes` object containing the unescaped string. On failure, this + function returns ``NULL`` with an exception set. + + .. versionchanged:: 3.9 + *unicode* and *recode_encoding* are now unused. diff --git a/Doc/c-api/capsule.rst b/Doc/c-api/capsule.rst index cdb8aa33e9fd32..03a848d68ed7ab 100644 --- a/Doc/c-api/capsule.rst +++ b/Doc/c-api/capsule.rst @@ -15,13 +15,19 @@ Refer to :ref:`using-capsules` for more information on using these objects. .. c:type:: PyCapsule This subtype of :c:type:`PyObject` represents an opaque value, useful for C - extension modules who need to pass an opaque value (as a :c:expr:`void*` + extension modules which need to pass an opaque value (as a :c:expr:`void*` pointer) through Python code to other C code. It is often used to make a C function pointer defined in one module available to other modules, so the regular import mechanism can be used to access C APIs defined in dynamically loaded modules. +.. c:var:: PyTypeObject PyCapsule_Type + + The type object corresponding to capsule objects. This is the same object + as :class:`types.CapsuleType` in the Python layer. + + .. c:type:: PyCapsule_Destructor The type of a destructor callback for a capsule. Defined as:: @@ -105,9 +111,19 @@ Refer to :ref:`using-capsules` for more information on using these objects. ``module.attribute``. The *name* stored in the capsule must match this string exactly. + This function splits *name* on the ``.`` character, and imports the first + element. It then processes further elements using attribute lookups. + Return the capsule's internal *pointer* on success. On failure, set an exception and return ``NULL``. + .. note:: + + If *name* points to an attribute of some submodule or subpackage, this + submodule or subpackage must be previously imported using other means + (for example, by using :c:func:`PyImport_ImportModule`) for the + attribute lookups to succeed. + .. versionchanged:: 3.3 *no_block* has no effect anymore. diff --git a/Doc/c-api/cell.rst b/Doc/c-api/cell.rst index 61eb994c370946..2501ed9580df89 100644 --- a/Doc/c-api/cell.rst +++ b/Doc/c-api/cell.rst @@ -7,7 +7,7 @@ Cell Objects "Cell" objects are used to implement variables referenced by multiple scopes. For each such variable, a cell object is created to store the value; the local -variables of each stack frame that references the value contains a reference to +variables of each stack frame that references the value contain a reference to the cells from outer scopes which also use that variable. When the value is accessed, the value contained in the cell is used instead of the cell object itself. This de-referencing of the cell object requires support from the diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 6eae24b38fae48..45f5e83adc48c6 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -182,7 +182,7 @@ bound into a function. Type of a code object watcher callback function. If *event* is ``PY_CODE_EVENT_CREATE``, then the callback is invoked - after `co` has been fully initialized. Otherwise, the callback is invoked + after *co* has been fully initialized. Otherwise, the callback is invoked before the destruction of *co* takes place, so the prior state of *co* can be inspected. @@ -211,6 +211,82 @@ bound into a function. .. versionadded:: 3.12 +.. c:function:: PyObject *PyCode_Optimize(PyObject *code, PyObject *consts, PyObject *names, PyObject *lnotab_obj) + + This is a :term:`soft deprecated` function that does nothing. + + Prior to Python 3.10, this function would perform basic optimizations to a + code object. + + .. versionchanged:: 3.10 + This function now does nothing. + + +.. _c_codeobject_flags: + +Code Object Flags +----------------- + +Code objects contain a bit-field of flags, which can be retrieved as the +:attr:`~codeobject.co_flags` Python attribute (for example using +:c:func:`PyObject_GetAttrString`), and set using a *flags* argument to +:c:func:`PyUnstable_Code_New` and similar functions. + +Flags whose names start with ``CO_FUTURE_`` correspond to features normally +selectable by :ref:`future statements `. These flags can be used in +:c:member:`PyCompilerFlags.cf_flags`. +Note that many ``CO_FUTURE_`` flags are mandatory in current versions of +Python, and setting them has no effect. + +The following flags are available. +For their meaning, see the linked documentation of their Python equivalents. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Flag + * Meaning + * * .. c:macro:: CO_OPTIMIZED + * :py:data:`inspect.CO_OPTIMIZED` + * * .. c:macro:: CO_NEWLOCALS + * :py:data:`inspect.CO_NEWLOCALS` + * * .. c:macro:: CO_VARARGS + * :py:data:`inspect.CO_VARARGS` + * * .. c:macro:: CO_VARKEYWORDS + * :py:data:`inspect.CO_VARKEYWORDS` + * * .. c:macro:: CO_NESTED + * :py:data:`inspect.CO_NESTED` + * * .. c:macro:: CO_GENERATOR + * :py:data:`inspect.CO_GENERATOR` + * * .. c:macro:: CO_COROUTINE + * :py:data:`inspect.CO_COROUTINE` + * * .. c:macro:: CO_ITERABLE_COROUTINE + * :py:data:`inspect.CO_ITERABLE_COROUTINE` + * * .. c:macro:: CO_ASYNC_GENERATOR + * :py:data:`inspect.CO_ASYNC_GENERATOR` + * * .. c:macro:: CO_HAS_DOCSTRING + * :py:data:`inspect.CO_HAS_DOCSTRING` + * * .. c:macro:: CO_METHOD + * :py:data:`inspect.CO_METHOD` + + * * .. c:macro:: CO_FUTURE_DIVISION + * no effect (:py:data:`__future__.division`) + * * .. c:macro:: CO_FUTURE_ABSOLUTE_IMPORT + * no effect (:py:data:`__future__.absolute_import`) + * * .. c:macro:: CO_FUTURE_WITH_STATEMENT + * no effect (:py:data:`__future__.with_statement`) + * * .. c:macro:: CO_FUTURE_PRINT_FUNCTION + * no effect (:py:data:`__future__.print_function`) + * * .. c:macro:: CO_FUTURE_UNICODE_LITERALS + * no effect (:py:data:`__future__.unicode_literals`) + * * .. c:macro:: CO_FUTURE_GENERATOR_STOP + * no effect (:py:data:`__future__.generator_stop`) + * * .. c:macro:: CO_FUTURE_ANNOTATIONS + * :py:data:`__future__.annotations` + + Extra information ----------------- @@ -224,7 +300,7 @@ may change without deprecation warnings. .. c:function:: Py_ssize_t PyUnstable_Eval_RequestCodeExtraIndex(freefunc free) - Return a new an opaque index value used to adding data to code objects. + Return a new opaque index value used to adding data to code objects. You generally call this function once (per interpreter) and use the result with ``PyCode_GetExtra`` and ``PyCode_SetExtra`` to manipulate diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index 8ae5c4fecd6248..35ee048bd5fa9f 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -7,7 +7,7 @@ Codec registry and support functions Register a new codec search function. - As side effect, this tries to load the :mod:`!encodings` package, if not yet + As a side effect, this tries to load the :mod:`!encodings` package, if not yet done, to make sure that it is always first in the list of search functions. .. c:function:: int PyCodec_Unregister(PyObject *search_function) @@ -39,7 +39,7 @@ Codec registry and support functions *object* is passed through the decoder function found for the given *encoding* using the error handling method defined by *errors*. *errors* may be ``NULL`` to use the default method defined for the codec. Raises a - :exc:`LookupError` if no encoder can be found. + :exc:`LookupError` if no decoder can be found. Codec lookup API @@ -129,3 +129,13 @@ Registry API for Unicode encoding error handlers Replace the unicode encode error with ``\N{...}`` escapes. .. versionadded:: 3.5 + + +Codec utility variables +----------------------- + +.. c:var:: const char *Py_hexdigits + + A string constant containing the lowercase hexadecimal digits: ``"0123456789abcdef"``. + + .. versionadded:: 3.3 diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst index 880f7b15ce68e8..1746fe95eaaca9 100644 --- a/Doc/c-api/concrete.rst +++ b/Doc/c-api/concrete.rst @@ -109,11 +109,20 @@ Other Objects descriptor.rst slice.rst memoryview.rst + picklebuffer.rst weakref.rst capsule.rst frame.rst gen.rst coro.rst contextvars.rst - datetime.rst typehints.rst + + +C API for extension modules +=========================== + +.. toctree:: + + curses.rst + datetime.rst diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index c92ef4c653a675..977406eb4d4222 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -41,7 +41,7 @@ The return value (*rv*) for these functions should be interpreted as follows: ``rv + 1`` bytes would have been needed to succeed. ``str[size-1]`` is ``'\0'`` in this case. -* When ``rv < 0``, "something bad happened." ``str[size-1]`` is ``'\0'`` in +* When ``rv < 0``, the output conversion failed and ``str[size-1]`` is ``'\0'`` in this case too, but the rest of *str* is undefined. The exact cause of the error depends on the underlying platform. @@ -128,18 +128,28 @@ The following functions provide locale-independent string to number conversions. must be 0 and is ignored. The ``'r'`` format code specifies the standard :func:`repr` format. - *flags* can be zero or more of the values ``Py_DTSF_SIGN``, - ``Py_DTSF_ADD_DOT_0``, or ``Py_DTSF_ALT``, or-ed together: + *flags* can be zero or more of the following values or-ed together: - * ``Py_DTSF_SIGN`` means to always precede the returned string with a sign - character, even if *val* is non-negative. + .. c:macro:: Py_DTSF_SIGN - * ``Py_DTSF_ADD_DOT_0`` means to ensure that the returned string will not look - like an integer. + Always precede the returned string with a sign + character, even if *val* is non-negative. - * ``Py_DTSF_ALT`` means to apply "alternate" formatting rules. See the - documentation for the :c:func:`PyOS_snprintf` ``'#'`` specifier for - details. + .. c:macro:: Py_DTSF_ADD_DOT_0 + + Ensure that the returned string will not look like an integer. + + .. c:macro:: Py_DTSF_ALT + + Apply "alternate" formatting rules. + See the documentation for the :c:func:`PyOS_snprintf` ``'#'`` specifier for + details. + + .. c:macro:: Py_DTSF_NO_NEG_0 + + Negative zero is converted to positive zero. + + .. versionadded:: 3.11 If *ptype* is non-``NULL``, then the value it points to will be set to one of ``Py_DTST_FINITE``, ``Py_DTST_INFINITE``, or ``Py_DTST_NAN``, signifying that @@ -152,13 +162,85 @@ The following functions provide locale-independent string to number conversions. .. versionadded:: 3.1 -.. c:function:: int PyOS_stricmp(const char *s1, const char *s2) +.. c:function:: int PyOS_mystricmp(const char *str1, const char *str2) + int PyOS_mystrnicmp(const char *str1, const char *str2, Py_ssize_t size) + + Case insensitive comparison of strings. These functions work almost + identically to :c:func:`!strcmp` and :c:func:`!strncmp` (respectively), + except that they ignore the case of ASCII characters. + + Return ``0`` if the strings are equal, a negative value if *str1* sorts + lexicographically before *str2*, or a positive value if it sorts after. + + In the *str1* or *str2* arguments, a NUL byte marks the end of the string. + For :c:func:`!PyOS_mystrnicmp`, the *size* argument gives the maximum size + of the string, as if NUL was present at the index given by *size*. + + These functions do not use the locale. + + +.. c:function:: int PyOS_stricmp(const char *str1, const char *str2) + int PyOS_strnicmp(const char *str1, const char *str2, Py_ssize_t size) + + Case insensitive comparison of strings. + + On Windows, these are aliases of :c:func:`!stricmp` and :c:func:`!strnicmp`, + respectively. + + On other platforms, they are aliases of :c:func:`PyOS_mystricmp` and + :c:func:`PyOS_mystrnicmp`, respectively. + + +Character classification and conversion +======================================= + +The following macros provide locale-independent (unlike the C standard library +``ctype.h``) character classification and conversion. +The argument must be a signed or unsigned :c:expr:`char`. + + +.. c:macro:: Py_ISALNUM(c) + + Return true if the character *c* is an alphanumeric character. + + +.. c:macro:: Py_ISALPHA(c) + + Return true if the character *c* is an alphabetic character (``a-z`` and ``A-Z``). + + +.. c:macro:: Py_ISDIGIT(c) + + Return true if the character *c* is a decimal digit (``0-9``). + + +.. c:macro:: Py_ISLOWER(c) + + Return true if the character *c* is a lowercase ASCII letter (``a-z``). + + +.. c:macro:: Py_ISUPPER(c) + + Return true if the character *c* is an uppercase ASCII letter (``A-Z``). + + +.. c:macro:: Py_ISSPACE(c) + + Return true if the character *c* is a whitespace character (space, tab, + carriage return, newline, vertical tab, or form feed). + + +.. c:macro:: Py_ISXDIGIT(c) + + Return true if the character *c* is a hexadecimal digit (``0-9``, ``a-f``, and + ``A-F``). + + +.. c:macro:: Py_TOLOWER(c) - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strcmp` except that it ignores the case. + Return the lowercase equivalent of the character *c*. -.. c:function:: int PyOS_strnicmp(const char *s1, const char *s2, Py_ssize_t size) +.. c:macro:: Py_TOUPPER(c) - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strncmp` except that it ignores the case. + Return the uppercase equivalent of the character *c*. diff --git a/Doc/c-api/curses.rst b/Doc/c-api/curses.rst new file mode 100644 index 00000000000000..5a1697c43cc969 --- /dev/null +++ b/Doc/c-api/curses.rst @@ -0,0 +1,138 @@ +.. highlight:: c + +Curses C API +------------ + +:mod:`curses` exposes a small C interface for extension modules. +Consumers must include the header file :file:`py_curses.h` (which is not +included by default by :file:`Python.h`) and :c:func:`import_curses` must +be invoked, usually as part of the module initialisation function, to populate +:c:var:`PyCurses_API`. + +.. warning:: + + Neither the C API nor the pure Python :mod:`curses` module are compatible + with subinterpreters. + +.. c:macro:: import_curses() + + Import the curses C API. The macro does not need a semi-colon to be called. + + On success, populate the :c:var:`PyCurses_API` pointer. + + On failure, set :c:var:`PyCurses_API` to NULL and set an exception. + The caller must check if an error occurred via :c:func:`PyErr_Occurred`: + + .. code-block:: + + import_curses(); // semi-colon is optional but recommended + if (PyErr_Occurred()) { /* cleanup */ } + + +.. c:var:: void **PyCurses_API + + Dynamically allocated object containing the curses C API. + This variable is only available once :c:macro:`import_curses` succeeds. + + ``PyCurses_API[0]`` corresponds to :c:data:`PyCursesWindow_Type`. + + ``PyCurses_API[1]``, ``PyCurses_API[2]``, and ``PyCurses_API[3]`` + are pointers to predicate functions of type ``int (*)(void)``. + + When called, these predicates return whether :func:`curses.setupterm`, + :func:`curses.initscr`, and :func:`curses.start_color` have been called + respectively. + + See also the convenience macros :c:macro:`PyCursesSetupTermCalled`, + :c:macro:`PyCursesInitialised`, and :c:macro:`PyCursesInitialisedColor`. + + .. note:: + + The number of entries in this structure is subject to changes. + Consider using :c:macro:`PyCurses_API_pointers` to check if + new fields are available or not. + + +.. c:macro:: PyCurses_API_pointers + + The number of accessible fields (``4``) in :c:var:`PyCurses_API`. + This number is incremented whenever new fields are added. + + +.. c:var:: PyTypeObject PyCursesWindow_Type + + The :ref:`heap type ` corresponding to :class:`curses.window`. + + +.. c:function:: int PyCursesWindow_Check(PyObject *op) + + Return true if *op* is a :class:`curses.window` instance, false otherwise. + + +The following macros are convenience macros expanding into C statements. +In particular, they can only be used as ``macro;`` or ``macro``, but not +``macro()`` or ``macro();``. + +.. c:macro:: PyCursesSetupTermCalled + + Macro checking if :func:`curses.setupterm` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_setupterm_called = (predicate_t)PyCurses_API[1]; + if (!was_setupterm_called()) { + return NULL; + } + } + + +.. c:macro:: PyCursesInitialised + + Macro checking if :func:`curses.initscr` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_initscr_called = (predicate_t)PyCurses_API[2]; + if (!was_initscr_called()) { + return NULL; + } + } + + +.. c:macro:: PyCursesInitialisedColor + + Macro checking if :func:`curses.start_color` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_start_color_called = (predicate_t)PyCurses_API[3]; + if (!was_start_color_called()) { + return NULL; + } + } + + +Internal data +------------- + +The following objects are exposed by the C API but should be considered +internal-only. + +.. c:macro:: PyCurses_CAPSULE_NAME + + Name of the curses capsule to pass to :c:func:`PyCapsule_Import`. + + Internal usage only. Use :c:macro:`import_curses` instead. + diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst index d2d4d5309c7098..127d7c9c91a3d5 100644 --- a/Doc/c-api/datetime.rst +++ b/Doc/c-api/datetime.rst @@ -8,11 +8,42 @@ DateTime Objects Various date and time objects are supplied by the :mod:`datetime` module. Before using any of these functions, the header file :file:`datetime.h` must be included in your source (note that this is not included by :file:`Python.h`), -and the macro :c:macro:`!PyDateTime_IMPORT` must be invoked, usually as part of +and the macro :c:macro:`PyDateTime_IMPORT` must be invoked, usually as part of the module initialisation function. The macro puts a pointer to a C structure -into a static variable, :c:data:`!PyDateTimeAPI`, that is used by the following +into a static variable, :c:data:`PyDateTimeAPI`, that is used by the following macros. +.. c:macro:: PyDateTime_IMPORT() + + Import the datetime C API. + + On success, populate the :c:var:`PyDateTimeAPI` pointer. + On failure, set :c:var:`PyDateTimeAPI` to ``NULL`` and set an exception. + The caller must check if an error occurred via :c:func:`PyErr_Occurred`: + + .. code-block:: + + PyDateTime_IMPORT; + if (PyErr_Occurred()) { /* cleanup */ } + + .. warning:: + + This is not compatible with subinterpreters. + +.. c:type:: PyDateTime_CAPI + + Structure containing the fields for the datetime C API. + + The fields of this structure are private and subject to change. + + Do not use this directly; prefer ``PyDateTime_*`` APIs instead. + +.. c:var:: PyDateTime_CAPI *PyDateTimeAPI + + Dynamically allocated object containing the datetime C API. + + This variable is only available once :c:macro:`PyDateTime_IMPORT` succeeds. + .. c:type:: PyDateTime_Date This subtype of :c:type:`PyObject` represents a Python date object. @@ -46,7 +77,7 @@ macros. .. c:var:: PyTypeObject PyDateTime_DeltaType - This instance of :c:type:`PyTypeObject` represents Python type for + This instance of :c:type:`PyTypeObject` represents the Python type for the difference between two datetime values; it is the same object as :class:`datetime.timedelta` in the Python layer. @@ -325,3 +356,16 @@ Macros for the convenience of modules implementing the DB API: Create and return a new :class:`datetime.date` object given an argument tuple suitable for passing to :meth:`datetime.date.fromtimestamp`. + + +Internal data +------------- + +The following symbols are exposed by the C API but should be considered +internal-only. + +.. c:macro:: PyDateTime_CAPSULE_NAME + + Name of the datetime capsule to pass to :c:func:`PyCapsule_Import`. + + Internal usage only. Use :c:macro:`PyDateTime_IMPORT` instead. diff --git a/Doc/c-api/descriptor.rst b/Doc/c-api/descriptor.rst index b32c113e5f0457..313c534545a861 100644 --- a/Doc/c-api/descriptor.rst +++ b/Doc/c-api/descriptor.rst @@ -21,20 +21,104 @@ found in the dictionary of type objects. .. c:function:: PyObject* PyDescr_NewMember(PyTypeObject *type, struct PyMemberDef *meth) +.. c:var:: PyTypeObject PyMemberDescr_Type + + The type object for member descriptor objects created from + :c:type:`PyMemberDef` structures. These descriptors expose fields of a + C struct as attributes on a type, and correspond + to :class:`types.MemberDescriptorType` objects in Python. + + + +.. c:var:: PyTypeObject PyGetSetDescr_Type + + The type object for get/set descriptor objects created from + :c:type:`PyGetSetDef` structures. These descriptors implement attributes + whose value is computed by C getter and setter functions, and are used + for many built-in type attributes. + + .. c:function:: PyObject* PyDescr_NewMethod(PyTypeObject *type, struct PyMethodDef *meth) +.. c:var:: PyTypeObject PyMethodDescr_Type + + The type object for method descriptor objects created from + :c:type:`PyMethodDef` structures. These descriptors expose C functions as + methods on a type, and correspond to :class:`types.MemberDescriptorType` + objects in Python. + + .. c:function:: PyObject* PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *wrapper, void *wrapped) +.. c:var:: PyTypeObject PyWrapperDescr_Type + + The type object for wrapper descriptor objects created by + :c:func:`PyDescr_NewWrapper` and :c:func:`PyWrapper_New`. Wrapper + descriptors are used internally to expose special methods implemented + via wrapper structures, and appear in Python as + :class:`types.WrapperDescriptorType` objects. + + .. c:function:: PyObject* PyDescr_NewClassMethod(PyTypeObject *type, PyMethodDef *method) .. c:function:: int PyDescr_IsData(PyObject *descr) - Return non-zero if the descriptor objects *descr* describes a data attribute, or + Return non-zero if the descriptor object *descr* describes a data attribute, or ``0`` if it describes a method. *descr* must be a descriptor object; there is no error checking. .. c:function:: PyObject* PyWrapper_New(PyObject *, PyObject *) + + +Built-in descriptors +^^^^^^^^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PySuper_Type + + The type object for super objects. This is the same object as + :class:`super` in the Python layer. + + +.. c:var:: PyTypeObject PyClassMethod_Type + + The type of class method objects. This is the same object as + :class:`classmethod` in the Python layer. + + +.. c:var:: PyTypeObject PyClassMethodDescr_Type + + The type object for C-level class method descriptor objects. + This is the type of the descriptors created for :func:`classmethod` defined in + C extension types, and is the same object as :class:`classmethod` + in Python. + + +.. c:function:: PyObject *PyClassMethod_New(PyObject *callable) + + Create a new :class:`classmethod` object wrapping *callable*. + *callable* must be a callable object and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to a new class + method descriptor. On failure, this function returns ``NULL`` with an + exception set. + + +.. c:var:: PyTypeObject PyStaticMethod_Type + + The type of static method objects. This is the same object as + :class:`staticmethod` in the Python layer. + + +.. c:function:: PyObject *PyStaticMethod_New(PyObject *callable) + + Create a new :class:`staticmethod` object wrapping *callable*. + *callable* must be a callable object and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to a new static + method descriptor. On failure, this function returns ``NULL`` with an + exception set. + diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index e55c5c80cb83c0..9c4428ced41b5a 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -43,6 +43,17 @@ Dictionary Objects prevent modification of the dictionary for non-dynamic class types. +.. c:var:: PyTypeObject PyDictProxy_Type + + The type object for mapping proxy objects created by + :c:func:`PyDictProxy_New` and for the read-only ``__dict__`` attribute + of many built-in types. A :c:type:`PyDictProxy_Type` instance provides a + dynamic, read-only view of an underlying dictionary: changes to the + underlying dictionary are reflected in the proxy, but the proxy itself + does not support mutation operations. This corresponds to + :class:`types.MappingProxyType` in Python. + + .. c:function:: void PyDict_Clear(PyObject *p) Empty an existing dictionary of all key-value pairs. @@ -50,7 +61,7 @@ Dictionary Objects .. c:function:: int PyDict_Contains(PyObject *p, PyObject *key) - Determine if dictionary *p* contains *key*. If an item in *p* is matches + Determine if dictionary *p* contains *key*. If an item in *p* matches *key*, return ``1``, otherwise return ``0``. On error, return ``-1``. This is equivalent to the Python expression ``key in p``. @@ -198,7 +209,7 @@ Dictionary Objects .. c:function:: int PyDict_Pop(PyObject *p, PyObject *key, PyObject **result) Remove *key* from dictionary *p* and optionally return the removed value. - Do not raise :exc:`KeyError` if the key missing. + Do not raise :exc:`KeyError` if the key is missing. - If the key is present, set *\*result* to a new reference to the removed value if *result* is not ``NULL``, and return ``1``. @@ -207,7 +218,7 @@ Dictionary Objects - On error, raise an exception and return ``-1``. Similar to :meth:`dict.pop`, but without the default value and - not raising :exc:`KeyError` if the key missing. + not raising :exc:`KeyError` if the key is missing. .. versionadded:: 3.13 @@ -245,6 +256,11 @@ Dictionary Objects ``len(p)`` on a dictionary. +.. c:function:: Py_ssize_t PyDict_GET_SIZE(PyObject *p) + + Similar to :c:func:`PyDict_Size`, but without error checking. + + .. c:function:: int PyDict_Next(PyObject *p, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) Iterate over all key-value pairs in the dictionary *p*. The @@ -301,6 +317,15 @@ Dictionary Objects } Py_END_CRITICAL_SECTION(); + .. note:: + + On the free-threaded build, this function can be used safely inside a + critical section. However, the references returned for *pkey* and *pvalue* + are :term:`borrowed ` and are only valid while the + critical section is held. If you need to use these objects outside the + critical section or when the critical section can be suspended, create a + :term:`strong reference ` (for example, using + :c:func:`Py_NewRef`). .. c:function:: int PyDict_Merge(PyObject *a, PyObject *b, int override) @@ -417,3 +442,138 @@ Dictionary Objects it before returning. .. versionadded:: 3.12 + + +Dictionary View Objects +^^^^^^^^^^^^^^^^^^^^^^^ + +.. c:function:: int PyDictViewSet_Check(PyObject *op) + + Return true if *op* is a view of a set inside a dictionary. This is currently + equivalent to :c:expr:`PyDictKeys_Check(op) || PyDictItems_Check(op)`. This + function always succeeds. + + +.. c:var:: PyTypeObject PyDictKeys_Type + + Type object for a view of dictionary keys. In Python, this is the type of + the object returned by :meth:`dict.keys`. + + +.. c:function:: int PyDictKeys_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary keys view. This function + always succeeds. + + +.. c:var:: PyTypeObject PyDictValues_Type + + Type object for a view of dictionary values. In Python, this is the type of + the object returned by :meth:`dict.values`. + + +.. c:function:: int PyDictValues_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary values view. This function + always succeeds. + + +.. c:var:: PyTypeObject PyDictItems_Type + + Type object for a view of dictionary items. In Python, this is the type of + the object returned by :meth:`dict.items`. + + +.. c:function:: int PyDictItems_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary items view. This function + always succeeds. + + +Ordered Dictionaries +^^^^^^^^^^^^^^^^^^^^ + +Python's C API provides interface for :class:`collections.OrderedDict` from C. +Since Python 3.7, dictionaries are ordered by default, so there is usually +little need for these functions; prefer ``PyDict*`` where possible. + + +.. c:var:: PyTypeObject PyODict_Type + + Type object for ordered dictionaries. This is the same object as + :class:`collections.OrderedDict` in the Python layer. + + +.. c:function:: int PyODict_Check(PyObject *od) + + Return true if *od* is an ordered dictionary object or an instance of a + subtype of the :class:`~collections.OrderedDict` type. This function + always succeeds. + + +.. c:function:: int PyODict_CheckExact(PyObject *od) + + Return true if *od* is an ordered dictionary object, but not an instance of + a subtype of the :class:`~collections.OrderedDict` type. + This function always succeeds. + + +.. c:var:: PyTypeObject PyODictKeys_Type + + Analogous to :c:type:`PyDictKeys_Type` for ordered dictionaries. + + +.. c:var:: PyTypeObject PyODictValues_Type + + Analogous to :c:type:`PyDictValues_Type` for ordered dictionaries. + + +.. c:var:: PyTypeObject PyODictItems_Type + + Analogous to :c:type:`PyDictItems_Type` for ordered dictionaries. + + +.. c:function:: PyObject *PyODict_New(void) + + Return a new empty ordered dictionary, or ``NULL`` on failure. + + This is analogous to :c:func:`PyDict_New`. + + +.. c:function:: int PyODict_SetItem(PyObject *od, PyObject *key, PyObject *value) + + Insert *value* into the ordered dictionary *od* with a key of *key*. + Return ``0`` on success or ``-1`` with an exception set on failure. + + This is analogous to :c:func:`PyDict_SetItem`. + + +.. c:function:: int PyODict_DelItem(PyObject *od, PyObject *key) + + Remove the entry in the ordered dictionary *od* with key *key*. + Return ``0`` on success or ``-1`` with an exception set on failure. + + This is analogous to :c:func:`PyDict_DelItem`. + + +These are :term:`soft deprecated` aliases to ``PyDict`` APIs: + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * ``PyODict`` + * ``PyDict`` + * * .. c:macro:: PyODict_GetItem(od, key) + * :c:func:`PyDict_GetItem` + * * .. c:macro:: PyODict_GetItemWithError(od, key) + * :c:func:`PyDict_GetItemWithError` + * * .. c:macro:: PyODict_GetItemString(od, key) + * :c:func:`PyDict_GetItemString` + * * .. c:macro:: PyODict_Contains(od, key) + * :c:func:`PyDict_Contains` + * * .. c:macro:: PyODict_Size(od) + * :c:func:`PyDict_Size` + * * .. c:macro:: PyODict_SIZE(od) + * :c:func:`PyDict_GET_SIZE` diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index c8e1b5c2461738..d7fe9e2c9ec9b4 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -309,6 +309,14 @@ For convenience, some of these functions will always return a .. versionadded:: 3.4 +.. c:function:: void PyErr_RangedSyntaxLocationObject(PyObject *filename, int lineno, int col_offset, int end_lineno, int end_col_offset) + + Similar to :c:func:`PyErr_SyntaxLocationObject`, but also sets the + *end_lineno* and *end_col_offset* information for the current exception. + + .. versionadded:: 3.10 + + .. c:function:: void PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) Like :c:func:`PyErr_SyntaxLocationObject`, but *filename* is a byte string @@ -331,6 +339,23 @@ For convenience, some of these functions will always return a use. +.. c:function:: PyObject *PyErr_ProgramTextObject(PyObject *filename, int lineno) + + Get the source line in *filename* at line *lineno*. *filename* should be a + Python :class:`str` object. + + On success, this function returns a Python string object with the found line. + On failure, this function returns ``NULL`` without an exception set. + + +.. c:function:: PyObject *PyErr_ProgramText(const char *filename, int lineno) + + Similar to :c:func:`PyErr_ProgramTextObject`, but *filename* is a + :c:expr:`const char *`, which is decoded with the + :term:`filesystem encoding and error handler`, instead of a + Python object reference. + + Issuing warnings ================ @@ -394,6 +419,15 @@ an error value). .. versionadded:: 3.2 +.. c:function:: int PyErr_WarnExplicitFormat(PyObject *category, const char *filename, int lineno, const char *module, PyObject *registry, const char *format, ...) + + Similar to :c:func:`PyErr_WarnExplicit`, but uses + :c:func:`PyUnicode_FromFormat` to format the warning message. *format* is + an ASCII-encoded string. + + .. versionadded:: 3.2 + + .. c:function:: int PyErr_ResourceWarning(PyObject *source, Py_ssize_t stack_level, const char *format, ...) Function similar to :c:func:`PyErr_WarnFormat`, but *category* is @@ -749,9 +783,30 @@ Exception Classes .. versionadded:: 3.2 +.. c:function:: int PyExceptionClass_Check(PyObject *ob) + + Return non-zero if *ob* is an exception class, zero otherwise. This function always succeeds. + + +.. c:function:: const char *PyExceptionClass_Name(PyObject *ob) + + Return :c:member:`~PyTypeObject.tp_name` of the exception class *ob*. + + Exception Objects ================= +.. c:function:: int PyExceptionInstance_Check(PyObject *op) + + Return true if *op* is an instance of :class:`BaseException`, false + otherwise. This function always succeeds. + + +.. c:macro:: PyExceptionInstance_Class(op) + + Equivalent to :c:func:`Py_TYPE(op) `. + + .. c:function:: PyObject* PyException_GetTraceback(PyObject *ex) Return the traceback associated with the exception as a new reference, as @@ -929,6 +984,9 @@ because the :ref:`call protocol ` takes care of recursion handling. be concatenated to the :exc:`RecursionError` message caused by the recursion depth limit. + .. seealso:: + The :c:func:`PyUnstable_ThreadState_SetStackProtection` function. + .. versionchanged:: 3.9 This function is now also available in the :ref:`limited API `. @@ -969,184 +1027,159 @@ these are the C equivalent to :func:`reprlib.recursive_repr`. Ends a :c:func:`Py_ReprEnter`. Must be called once for each invocation of :c:func:`Py_ReprEnter` that returns zero. +.. c:function:: int Py_GetRecursionLimit(void) + + Get the recursion limit for the current interpreter. It can be set with + :c:func:`Py_SetRecursionLimit`. The recursion limit prevents the + Python interpreter stack from growing infinitely. + + This function cannot fail, and the caller must hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`sys.getrecursionlimit` + +.. c:function:: void Py_SetRecursionLimit(int new_limit) + + Set the recursion limit for the current interpreter. + + This function cannot fail, and the caller must hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`sys.setrecursionlimit` .. _standardexceptions: -Standard Exceptions -=================== - -All standard Python exceptions are available as global variables whose names are -``PyExc_`` followed by the Python exception name. These have the type -:c:expr:`PyObject*`; they are all class objects. For completeness, here are all -the variables: - -.. index:: - single: PyExc_BaseException (C var) - single: PyExc_Exception (C var) - single: PyExc_ArithmeticError (C var) - single: PyExc_AssertionError (C var) - single: PyExc_AttributeError (C var) - single: PyExc_BlockingIOError (C var) - single: PyExc_BrokenPipeError (C var) - single: PyExc_BufferError (C var) - single: PyExc_ChildProcessError (C var) - single: PyExc_ConnectionAbortedError (C var) - single: PyExc_ConnectionError (C var) - single: PyExc_ConnectionRefusedError (C var) - single: PyExc_ConnectionResetError (C var) - single: PyExc_EOFError (C var) - single: PyExc_FileExistsError (C var) - single: PyExc_FileNotFoundError (C var) - single: PyExc_FloatingPointError (C var) - single: PyExc_GeneratorExit (C var) - single: PyExc_ImportError (C var) - single: PyExc_IndentationError (C var) - single: PyExc_IndexError (C var) - single: PyExc_InterruptedError (C var) - single: PyExc_IsADirectoryError (C var) - single: PyExc_KeyError (C var) - single: PyExc_KeyboardInterrupt (C var) - single: PyExc_LookupError (C var) - single: PyExc_MemoryError (C var) - single: PyExc_ModuleNotFoundError (C var) - single: PyExc_NameError (C var) - single: PyExc_NotADirectoryError (C var) - single: PyExc_NotImplementedError (C var) - single: PyExc_OSError (C var) - single: PyExc_OverflowError (C var) - single: PyExc_PermissionError (C var) - single: PyExc_ProcessLookupError (C var) - single: PyExc_PythonFinalizationError (C var) - single: PyExc_RecursionError (C var) - single: PyExc_ReferenceError (C var) - single: PyExc_RuntimeError (C var) - single: PyExc_StopAsyncIteration (C var) - single: PyExc_StopIteration (C var) - single: PyExc_SyntaxError (C var) - single: PyExc_SystemError (C var) - single: PyExc_SystemExit (C var) - single: PyExc_TabError (C var) - single: PyExc_TimeoutError (C var) - single: PyExc_TypeError (C var) - single: PyExc_UnboundLocalError (C var) - single: PyExc_UnicodeDecodeError (C var) - single: PyExc_UnicodeEncodeError (C var) - single: PyExc_UnicodeError (C var) - single: PyExc_UnicodeTranslateError (C var) - single: PyExc_ValueError (C var) - single: PyExc_ZeroDivisionError (C var) - -+-----------------------------------------+---------------------------------+----------+ -| C Name | Python Name | Notes | -+=========================================+=================================+==========+ -| :c:data:`PyExc_BaseException` | :exc:`BaseException` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_Exception` | :exc:`Exception` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_AssertionError` | :exc:`AssertionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_AttributeError` | :exc:`AttributeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BlockingIOError` | :exc:`BlockingIOError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BrokenPipeError` | :exc:`BrokenPipeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BufferError` | :exc:`BufferError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ChildProcessError` | :exc:`ChildProcessError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionAbortedError` | :exc:`ConnectionAbortedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionError` | :exc:`ConnectionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionRefusedError` | :exc:`ConnectionRefusedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ConnectionResetError` | :exc:`ConnectionResetError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_EOFError` | :exc:`EOFError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FileExistsError` | :exc:`FileExistsError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FileNotFoundError` | :exc:`FileNotFoundError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FloatingPointError` | :exc:`FloatingPointError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_GeneratorExit` | :exc:`GeneratorExit` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ImportError` | :exc:`ImportError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IndentationError` | :exc:`IndentationError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IndexError` | :exc:`IndexError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_InterruptedError` | :exc:`InterruptedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_IsADirectoryError` | :exc:`IsADirectoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_KeyError` | :exc:`KeyError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_KeyboardInterrupt` | :exc:`KeyboardInterrupt` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_LookupError` | :exc:`LookupError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_MemoryError` | :exc:`MemoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ModuleNotFoundError` | :exc:`ModuleNotFoundError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NameError` | :exc:`NameError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NotADirectoryError` | :exc:`NotADirectoryError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_NotImplementedError` | :exc:`NotImplementedError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_OSError` | :exc:`OSError` | [1]_ | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_OverflowError` | :exc:`OverflowError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PermissionError` | :exc:`PermissionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ProcessLookupError` | :exc:`ProcessLookupError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PythonFinalizationError` | :exc:`PythonFinalizationError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RecursionError` | :exc:`RecursionError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ReferenceError` | :exc:`ReferenceError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RuntimeError` | :exc:`RuntimeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_StopAsyncIteration` | :exc:`StopAsyncIteration` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_StopIteration` | :exc:`StopIteration` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SyntaxError` | :exc:`SyntaxError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SystemError` | :exc:`SystemError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SystemExit` | :exc:`SystemExit` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TabError` | :exc:`TabError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TimeoutError` | :exc:`TimeoutError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_TypeError` | :exc:`TypeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnboundLocalError` | :exc:`UnboundLocalError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeDecodeError` | :exc:`UnicodeDecodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeEncodeError` | :exc:`UnicodeEncodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeError` | :exc:`UnicodeError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeTranslateError` | :exc:`UnicodeTranslateError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ValueError` | :exc:`ValueError` | | -+-----------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ZeroDivisionError` | :exc:`ZeroDivisionError` | | -+-----------------------------------------+---------------------------------+----------+ +Exception and warning types +=========================== + +All standard Python exceptions and warning categories are available as global +variables whose names are ``PyExc_`` followed by the Python exception name. +These have the type :c:expr:`PyObject*`; they are all class objects. + +For completeness, here are all the variables: + +Exception types +--------------- + +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * * .. c:var:: PyObject *PyExc_BaseException + * :exc:`BaseException` + * * .. c:var:: PyObject *PyExc_BaseExceptionGroup + * :exc:`BaseExceptionGroup` + * * .. c:var:: PyObject *PyExc_Exception + * :exc:`Exception` + * * .. c:var:: PyObject *PyExc_ArithmeticError + * :exc:`ArithmeticError` + * * .. c:var:: PyObject *PyExc_AssertionError + * :exc:`AssertionError` + * * .. c:var:: PyObject *PyExc_AttributeError + * :exc:`AttributeError` + * * .. c:var:: PyObject *PyExc_BlockingIOError + * :exc:`BlockingIOError` + * * .. c:var:: PyObject *PyExc_BrokenPipeError + * :exc:`BrokenPipeError` + * * .. c:var:: PyObject *PyExc_BufferError + * :exc:`BufferError` + * * .. c:var:: PyObject *PyExc_ChildProcessError + * :exc:`ChildProcessError` + * * .. c:var:: PyObject *PyExc_ConnectionAbortedError + * :exc:`ConnectionAbortedError` + * * .. c:var:: PyObject *PyExc_ConnectionError + * :exc:`ConnectionError` + * * .. c:var:: PyObject *PyExc_ConnectionRefusedError + * :exc:`ConnectionRefusedError` + * * .. c:var:: PyObject *PyExc_ConnectionResetError + * :exc:`ConnectionResetError` + * * .. c:var:: PyObject *PyExc_EOFError + * :exc:`EOFError` + * * .. c:var:: PyObject *PyExc_FileExistsError + * :exc:`FileExistsError` + * * .. c:var:: PyObject *PyExc_FileNotFoundError + * :exc:`FileNotFoundError` + * * .. c:var:: PyObject *PyExc_FloatingPointError + * :exc:`FloatingPointError` + * * .. c:var:: PyObject *PyExc_GeneratorExit + * :exc:`GeneratorExit` + * * .. c:var:: PyObject *PyExc_ImportError + * :exc:`ImportError` + * * .. c:var:: PyObject *PyExc_IndentationError + * :exc:`IndentationError` + * * .. c:var:: PyObject *PyExc_IndexError + * :exc:`IndexError` + * * .. c:var:: PyObject *PyExc_InterruptedError + * :exc:`InterruptedError` + * * .. c:var:: PyObject *PyExc_IsADirectoryError + * :exc:`IsADirectoryError` + * * .. c:var:: PyObject *PyExc_KeyError + * :exc:`KeyError` + * * .. c:var:: PyObject *PyExc_KeyboardInterrupt + * :exc:`KeyboardInterrupt` + * * .. c:var:: PyObject *PyExc_LookupError + * :exc:`LookupError` + * * .. c:var:: PyObject *PyExc_MemoryError + * :exc:`MemoryError` + * * .. c:var:: PyObject *PyExc_ModuleNotFoundError + * :exc:`ModuleNotFoundError` + * * .. c:var:: PyObject *PyExc_NameError + * :exc:`NameError` + * * .. c:var:: PyObject *PyExc_NotADirectoryError + * :exc:`NotADirectoryError` + * * .. c:var:: PyObject *PyExc_NotImplementedError + * :exc:`NotImplementedError` + * * .. c:var:: PyObject *PyExc_OSError + * :exc:`OSError` + * * .. c:var:: PyObject *PyExc_OverflowError + * :exc:`OverflowError` + * * .. c:var:: PyObject *PyExc_PermissionError + * :exc:`PermissionError` + * * .. c:var:: PyObject *PyExc_ProcessLookupError + * :exc:`ProcessLookupError` + * * .. c:var:: PyObject *PyExc_PythonFinalizationError + * :exc:`PythonFinalizationError` + * * .. c:var:: PyObject *PyExc_RecursionError + * :exc:`RecursionError` + * * .. c:var:: PyObject *PyExc_ReferenceError + * :exc:`ReferenceError` + * * .. c:var:: PyObject *PyExc_RuntimeError + * :exc:`RuntimeError` + * * .. c:var:: PyObject *PyExc_StopAsyncIteration + * :exc:`StopAsyncIteration` + * * .. c:var:: PyObject *PyExc_StopIteration + * :exc:`StopIteration` + * * .. c:var:: PyObject *PyExc_SyntaxError + * :exc:`SyntaxError` + * * .. c:var:: PyObject *PyExc_SystemError + * :exc:`SystemError` + * * .. c:var:: PyObject *PyExc_SystemExit + * :exc:`SystemExit` + * * .. c:var:: PyObject *PyExc_TabError + * :exc:`TabError` + * * .. c:var:: PyObject *PyExc_TimeoutError + * :exc:`TimeoutError` + * * .. c:var:: PyObject *PyExc_TypeError + * :exc:`TypeError` + * * .. c:var:: PyObject *PyExc_UnboundLocalError + * :exc:`UnboundLocalError` + * * .. c:var:: PyObject *PyExc_UnicodeDecodeError + * :exc:`UnicodeDecodeError` + * * .. c:var:: PyObject *PyExc_UnicodeEncodeError + * :exc:`UnicodeEncodeError` + * * .. c:var:: PyObject *PyExc_UnicodeError + * :exc:`UnicodeError` + * * .. c:var:: PyObject *PyExc_UnicodeTranslateError + * :exc:`UnicodeTranslateError` + * * .. c:var:: PyObject *PyExc_ValueError + * :exc:`ValueError` + * * .. c:var:: PyObject *PyExc_ZeroDivisionError + * :exc:`ZeroDivisionError` .. versionadded:: 3.3 :c:data:`PyExc_BlockingIOError`, :c:data:`PyExc_BrokenPipeError`, @@ -1164,88 +1197,116 @@ the variables: .. versionadded:: 3.6 :c:data:`PyExc_ModuleNotFoundError`. -These are compatibility aliases to :c:data:`PyExc_OSError`: +.. versionadded:: 3.11 + :c:data:`PyExc_BaseExceptionGroup`. -.. index:: - single: PyExc_EnvironmentError (C var) - single: PyExc_IOError (C var) - single: PyExc_WindowsError (C var) -+-------------------------------------+----------+ -| C Name | Notes | -+=====================================+==========+ -| :c:data:`!PyExc_EnvironmentError` | | -+-------------------------------------+----------+ -| :c:data:`!PyExc_IOError` | | -+-------------------------------------+----------+ -| :c:data:`!PyExc_WindowsError` | [2]_ | -+-------------------------------------+----------+ +OSError aliases +--------------- + +The following are a compatibility aliases to :c:data:`PyExc_OSError`. .. versionchanged:: 3.3 These aliases used to be separate exception types. +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * Notes + * * .. c:var:: PyObject *PyExc_EnvironmentError + * :exc:`OSError` + * + * * .. c:var:: PyObject *PyExc_IOError + * :exc:`OSError` + * + * * .. c:var:: PyObject *PyExc_WindowsError + * :exc:`OSError` + * [win]_ + Notes: -.. [1] - This is a base class for other standard exceptions. +.. [win] + :c:var:`!PyExc_WindowsError` is only defined on Windows; protect code that + uses this by testing that the preprocessor macro ``MS_WINDOWS`` is defined. -.. [2] - Only defined on Windows; protect code that uses this by testing that the - preprocessor macro ``MS_WINDOWS`` is defined. .. _standardwarningcategories: -Standard Warning Categories -=========================== - -All standard Python warning categories are available as global variables whose -names are ``PyExc_`` followed by the Python exception name. These have the type -:c:expr:`PyObject*`; they are all class objects. For completeness, here are all -the variables: - -.. index:: - single: PyExc_Warning (C var) - single: PyExc_BytesWarning (C var) - single: PyExc_DeprecationWarning (C var) - single: PyExc_FutureWarning (C var) - single: PyExc_ImportWarning (C var) - single: PyExc_PendingDeprecationWarning (C var) - single: PyExc_ResourceWarning (C var) - single: PyExc_RuntimeWarning (C var) - single: PyExc_SyntaxWarning (C var) - single: PyExc_UnicodeWarning (C var) - single: PyExc_UserWarning (C var) - -+------------------------------------------+---------------------------------+----------+ -| C Name | Python Name | Notes | -+==========================================+=================================+==========+ -| :c:data:`PyExc_Warning` | :exc:`Warning` | [3]_ | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_BytesWarning` | :exc:`BytesWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_DeprecationWarning` | :exc:`DeprecationWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_FutureWarning` | :exc:`FutureWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ImportWarning` | :exc:`ImportWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_PendingDeprecationWarning`| :exc:`PendingDeprecationWarning`| | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_ResourceWarning` | :exc:`ResourceWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_RuntimeWarning` | :exc:`RuntimeWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_SyntaxWarning` | :exc:`SyntaxWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UnicodeWarning` | :exc:`UnicodeWarning` | | -+------------------------------------------+---------------------------------+----------+ -| :c:data:`PyExc_UserWarning` | :exc:`UserWarning` | | -+------------------------------------------+---------------------------------+----------+ +Warning types +------------- + +.. list-table:: + :align: left + :widths: auto + :header-rows: 1 + + * * C name + * Python name + * * .. c:var:: PyObject *PyExc_Warning + * :exc:`Warning` + * * .. c:var:: PyObject *PyExc_BytesWarning + * :exc:`BytesWarning` + * * .. c:var:: PyObject *PyExc_DeprecationWarning + * :exc:`DeprecationWarning` + * * .. c:var:: PyObject *PyExc_EncodingWarning + * :exc:`EncodingWarning` + * * .. c:var:: PyObject *PyExc_FutureWarning + * :exc:`FutureWarning` + * * .. c:var:: PyObject *PyExc_ImportWarning + * :exc:`ImportWarning` + * * .. c:var:: PyObject *PyExc_PendingDeprecationWarning + * :exc:`PendingDeprecationWarning` + * * .. c:var:: PyObject *PyExc_ResourceWarning + * :exc:`ResourceWarning` + * * .. c:var:: PyObject *PyExc_RuntimeWarning + * :exc:`RuntimeWarning` + * * .. c:var:: PyObject *PyExc_SyntaxWarning + * :exc:`SyntaxWarning` + * * .. c:var:: PyObject *PyExc_UnicodeWarning + * :exc:`UnicodeWarning` + * * .. c:var:: PyObject *PyExc_UserWarning + * :exc:`UserWarning` .. versionadded:: 3.2 :c:data:`PyExc_ResourceWarning`. -Notes: +.. versionadded:: 3.10 + :c:data:`PyExc_EncodingWarning`. + + +Tracebacks +========== + +.. c:var:: PyTypeObject PyTraceBack_Type + + Type object for traceback objects. This is available as + :class:`types.TracebackType` in the Python layer. + + +.. c:function:: int PyTraceBack_Check(PyObject *op) + + Return true if *op* is a traceback object, false otherwise. This function + does not account for subtypes. + + +.. c:function:: int PyTraceBack_Here(PyFrameObject *f) + + Replace the :attr:`~BaseException.__traceback__` attribute on the current + exception with a new traceback prepending *f* to the existing chain. + + Calling this function without an exception set is undefined behavior. + + This function returns ``0`` on success, and returns ``-1`` with an + exception set on failure. + + +.. c:function:: int PyTraceBack_Print(PyObject *tb, PyObject *f) + + Write the traceback *tb* into the file *f*. -.. [3] - This is a base class for other standard warning categories. + This function returns ``0`` on success, and returns ``-1`` with an + exception set on failure. diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst new file mode 100644 index 00000000000000..3d331e6ec12f76 --- /dev/null +++ b/Doc/c-api/extension-modules.rst @@ -0,0 +1,247 @@ +.. highlight:: c + +.. _extension-modules: + +Defining extension modules +-------------------------- + +A C extension for CPython is a shared library (for example, a ``.so`` file +on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process +(for example, it is compiled with compatible compiler settings), and which +exports an :ref:`initialization function `. + +To be importable by default (that is, by +:py:class:`importlib.machinery.ExtensionFileLoader`), +the shared library must be available on :py:attr:`sys.path`, +and must be named after the module name plus an extension listed in +:py:attr:`importlib.machinery.EXTENSION_SUFFIXES`. + +.. note:: + + Building, packaging and distributing extension modules is best done with + third-party tools, and is out of scope of this document. + One suitable tool is Setuptools, whose documentation can be found at + https://setuptools.pypa.io/en/latest/setuptools.html. + +Normally, the initialization function returns a module definition initialized +using :c:func:`PyModuleDef_Init`. +This allows splitting the creation process into several phases: + +- Before any substantial code is executed, Python can determine which + capabilities the module supports, and it can adjust the environment or + refuse loading an incompatible extension. +- By default, Python itself creates the module object -- that is, it does + the equivalent of :py:meth:`object.__new__` for classes. + It also sets initial attributes like :attr:`~module.__package__` and + :attr:`~module.__loader__`. +- Afterwards, the module object is initialized using extension-specific + code -- the equivalent of :py:meth:`~object.__init__` on classes. + +This is called *multi-phase initialization* to distinguish it from the legacy +(but still supported) *single-phase initialization* scheme, +where the initialization function returns a fully constructed module. +See the :ref:`single-phase-initialization section below ` +for details. + +.. versionchanged:: 3.5 + + Added support for multi-phase initialization (:pep:`489`). + + +Multiple module instances +......................... + +By default, extension modules are not singletons. +For example, if the :py:attr:`sys.modules` entry is removed and the module +is re-imported, a new module object is created, and typically populated with +fresh method and type objects. +The old module is subject to normal garbage collection. +This mirrors the behavior of pure-Python modules. + +Additional module instances may be created in +:ref:`sub-interpreters ` +or after Python runtime reinitialization +(:c:func:`Py_Finalize` and :c:func:`Py_Initialize`). +In these cases, sharing Python objects between module instances would likely +cause crashes or undefined behavior. + +To avoid such issues, each instance of an extension module should +be *isolated*: changes to one instance should not implicitly affect the others, +and all state owned by the module, including references to Python objects, +should be specific to a particular module instance. +See :ref:`isolating-extensions-howto` for more details and a practical guide. + +A simpler way to avoid these issues is +:ref:`raising an error on repeated initialization `. + +All modules are expected to support +:ref:`sub-interpreters `, or otherwise explicitly +signal a lack of support. +This is usually achieved by isolation or blocking repeated initialization, +as above. +A module may also be limited to the main interpreter using +the :c:data:`Py_mod_multiple_interpreters` slot. + + +.. _extension-export-hook: + +Initialization function +....................... + +The initialization function defined by an extension module has the +following signature: + +.. c:function:: PyObject* PyInit_modulename(void) + +Its name should be :samp:`PyInit_{}`, with ```` replaced by the +name of the module. + +For modules with ASCII-only names, the function must instead be named +:samp:`PyInit_{}`, with ```` replaced by the name of the module. +When using :ref:`multi-phase-initialization`, non-ASCII module names +are allowed. In this case, the initialization function name is +:samp:`PyInitU_{}`, with ```` encoded using Python's +*punycode* encoding with hyphens replaced by underscores. In Python: + +.. code-block:: python + + def initfunc_name(name): + try: + suffix = b'_' + name.encode('ascii') + except UnicodeEncodeError: + suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') + return b'PyInit' + suffix + +It is recommended to define the initialization function using a helper macro: + +.. c:macro:: PyMODINIT_FUNC + + Declare an extension module initialization function. + This macro: + + * specifies the :c:expr:`PyObject*` return type, + * adds any special linkage declarations required by the platform, and + * for C++, declares the function as ``extern "C"``. + +For example, a module called ``spam`` would be defined like this:: + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + ... + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); + } + +It is possible to export multiple modules from a single shared library by +defining multiple initialization functions. However, importing them requires +using symbolic links or a custom importer, because by default only the +function corresponding to the filename is found. +See the `Multiple modules in one library `__ +section in :pep:`489` for details. + +The initialization function is typically the only non-\ ``static`` +item defined in the module's C source. + + +.. _multi-phase-initialization: + +Multi-phase initialization +.......................... + +Normally, the :ref:`initialization function ` +(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with +non-``NULL`` :c:member:`~PyModuleDef.m_slots`. +Before it is returned, the ``PyModuleDef`` instance must be initialized +using the following function: + + +.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) + + Ensure a module definition is a properly initialized Python object that + correctly reports its type and a reference count. + + Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. + + Calling this function is required for :ref:`multi-phase-initialization`. + It should not be used in other contexts. + + Note that Python assumes that ``PyModuleDef`` structures are statically + allocated. + This function may return either a new reference or a borrowed one; + this reference must not be released. + + .. versionadded:: 3.5 + + +.. _single-phase-initialization: + +Legacy single-phase initialization +.................................. + +.. attention:: + Single-phase initialization is a legacy mechanism to initialize extension + modules, with known drawbacks and design flaws. Extension module authors + are encouraged to use multi-phase initialization instead. + +In single-phase initialization, the +:ref:`initialization function ` (``PyInit_modulename``) +should create, populate and return a module object. +This is typically done using :c:func:`PyModule_Create` and functions like +:c:func:`PyModule_AddObjectRef`. + +Single-phase initialization differs from the :ref:`default ` +in the following ways: + +* Single-phase modules are, or rather *contain*, “singletons”. + + When the module is first initialized, Python saves the contents of + the module's ``__dict__`` (that is, typically, the module's functions and + types). + + For subsequent imports, Python does not call the initialization function + again. + Instead, it creates a new module object with a new ``__dict__``, and copies + the saved contents to it. + For example, given a single-phase module ``_testsinglephase`` + [#testsinglephase]_ that defines a function ``sum`` and an exception class + ``error``: + + .. code-block:: python + + >>> import sys + >>> import _testsinglephase as one + >>> del sys.modules['_testsinglephase'] + >>> import _testsinglephase as two + >>> one is two + False + >>> one.__dict__ is two.__dict__ + False + >>> one.sum is two.sum + True + >>> one.error is two.error + True + + The exact behavior should be considered a CPython implementation detail. + +* To work around the fact that ``PyInit_modulename`` does not take a *spec* + argument, some state of the import machinery is saved and applied to the + first suitable module created during the ``PyInit_modulename`` call. + Specifically, when a sub-module is imported, this mechanism prepends the + parent package name to the name of the module. + + A single-phase ``PyInit_modulename`` function should create “its” module + object as soon as possible, before any other module objects can be created. + +* Non-ASCII module names (``PyInitU_modulename``) are not supported. + +* Single-phase modules support module lookup functions like + :c:func:`PyState_FindModule`. + +.. [#testsinglephase] ``_testsinglephase`` is an internal module used + in CPython's self-test suite; your installation may or may not + include it. diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index e9019a0d500f7e..9d01254ddb2a11 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -93,6 +93,29 @@ the :mod:`io` APIs instead. .. versionadded:: 3.8 +.. c:function:: PyObject *PyFile_OpenCodeObject(PyObject *path) + + Open *path* with the mode ``'rb'``. *path* must be a Python :class:`str` + object. The behavior of this function may be overridden by + :c:func:`PyFile_SetOpenCodeHook` to allow for some preprocessing of the + text. + + This is analogous to :func:`io.open_code` in Python. + + On success, this function returns a :term:`strong reference` to a Python + file object. On failure, this function returns ``NULL`` with an exception + set. + + .. versionadded:: 3.8 + + +.. c:function:: PyObject *PyFile_OpenCode(const char *path) + + Similar to :c:func:`PyFile_OpenCodeObject`, but *path* is a + UTF-8 encoded :c:expr:`const char*`. + + .. versionadded:: 3.8 + .. c:function:: int PyFile_WriteObject(PyObject *obj, PyObject *p, int flags) diff --git a/Doc/c-api/float.rst b/Doc/c-api/float.rst index c5a7653efca26b..51540004c93db6 100644 --- a/Doc/c-api/float.rst +++ b/Doc/c-api/float.rst @@ -78,6 +78,104 @@ Floating-Point Objects Return the minimum normalized positive float *DBL_MIN* as C :c:expr:`double`. +.. c:macro:: Py_INFINITY + + This macro expands a to constant expression of type :c:expr:`double`, that + represents the positive infinity. + + On most platforms, this is equivalent to the :c:macro:`!INFINITY` macro from + the C11 standard ```` header. + + +.. c:macro:: Py_NAN + + This macro expands a to constant expression of type :c:expr:`double`, that + represents a quiet not-a-number (qNaN) value. + + On most platforms, this is equivalent to the :c:macro:`!NAN` macro from + the C11 standard ```` header. + + +.. c:macro:: Py_HUGE_VAL + + Equivalent to :c:macro:`!INFINITY`. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. + + +.. c:macro:: Py_MATH_E + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.e` constant. + + +.. c:macro:: Py_MATH_El + + High precision (long double) definition of :data:`~math.e` constant. + + +.. c:macro:: Py_MATH_PI + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.pi` constant. + + +.. c:macro:: Py_MATH_PIl + + High precision (long double) definition of :data:`~math.pi` constant. + + +.. c:macro:: Py_MATH_TAU + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.tau` constant. + + .. versionadded:: 3.6 + + +.. c:macro:: Py_RETURN_NAN + + Return :data:`math.nan` from a function. + + On most platforms, this is equivalent to ``return PyFloat_FromDouble(NAN)``. + + +.. c:macro:: Py_RETURN_INF(sign) + + Return :data:`math.inf` or :data:`-math.inf ` from a function, + depending on the sign of *sign*. + + On most platforms, this is equivalent to the following:: + + return PyFloat_FromDouble(copysign(INFINITY, sign)); + + +.. c:macro:: Py_IS_FINITE(X) + + Return ``1`` if the given floating-point number *X* is finite, + that is, it is normal, subnormal or zero, but not infinite or NaN. + Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isfinite` instead. + + +.. c:macro:: Py_IS_INFINITY(X) + + Return ``1`` if the given floating-point number *X* is positive or negative + infinity. Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isinf` instead. + + +.. c:macro:: Py_IS_NAN(X) + + Return ``1`` if the given floating-point number *X* is a not-a-number (NaN) + value. Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isnan` instead. + + Pack and Unpack functions ------------------------- @@ -96,8 +194,8 @@ NaNs (if such things exist on the platform) isn't handled correctly, and attempting to unpack a bytes string containing an IEEE INF or NaN will raise an exception. -Note that NaNs type may not be preserved on IEEE platforms (silent NaN become -quiet), for example on x86 systems in 32-bit mode. +Note that NaNs type may not be preserved on IEEE platforms (signaling NaN become +quiet NaN), for example on x86 systems in 32-bit mode. On non-IEEE platforms with more precision, or larger dynamic range, than IEEE 754 supports, not all values can be packed; on non-IEEE platforms with less @@ -124,15 +222,15 @@ There are two problems on non-IEEE platforms: * What this does is undefined if *x* is a NaN or infinity. * ``-0.0`` and ``+0.0`` produce the same bytes string. -.. c:function:: int PyFloat_Pack2(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack2(double x, char *p, int le) Pack a C double as the IEEE 754 binary16 half-precision format. -.. c:function:: int PyFloat_Pack4(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack4(double x, char *p, int le) Pack a C double as the IEEE 754 binary32 single precision format. -.. c:function:: int PyFloat_Pack8(double x, unsigned char *p, int le) +.. c:function:: int PyFloat_Pack8(double x, char *p, int le) Pack a C double as the IEEE 754 binary64 double precision format. @@ -154,14 +252,14 @@ Return value: The unpacked double. On error, this is ``-1.0`` and Note that on a non-IEEE platform this will refuse to unpack a bytes string that represents a NaN or infinity. -.. c:function:: double PyFloat_Unpack2(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack2(const char *p, int le) Unpack the IEEE 754 binary16 half-precision format as a C double. -.. c:function:: double PyFloat_Unpack4(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack4(const char *p, int le) Unpack the IEEE 754 binary32 single precision format as a C double. -.. c:function:: double PyFloat_Unpack8(const unsigned char *p, int le) +.. c:function:: double PyFloat_Unpack8(const char *p, int le) Unpack the IEEE 754 binary64 double precision format as a C double. diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index 1a52e146a69751..fb17cf7f1da6b2 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -29,6 +29,12 @@ See also :ref:`Reflection `. Previously, this type was only available after including ````. +.. c:function:: PyFrameObject *PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyObject *locals) + + Create a new frame object. This function returns a :term:`strong reference` + to the new frame object on success, and returns ``NULL`` with an exception + set on failure. + .. c:function:: int PyFrame_Check(PyObject *obj) Return non-zero if *obj* is a frame object. @@ -161,6 +167,57 @@ See :pep:`667` for more information. Return non-zero if *obj* is a frame :func:`locals` proxy. + +Legacy Local Variable APIs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These APIs are :term:`soft deprecated`. As of Python 3.13, they do nothing. +They exist solely for backwards compatibility. + + +.. c:function:: void PyFrame_LocalsToFast(PyFrameObject *f, int clear) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function would copy the :attr:`~frame.f_locals` + attribute of *f* to the internal "fast" array of local variables, allowing + changes in frame objects to be visible to the interpreter. If *clear* was + true, this function would process variables that were unset in the locals + dictionary. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. c:function:: void PyFrame_FastToLocals(PyFrameObject *f) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function would copy the internal "fast" array + of local variables (which is used by the interpreter) to the + :attr:`~frame.f_locals` attribute of *f*, allowing changes in local + variables to be visible to frame objects. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. c:function:: int PyFrame_FastToLocalsWithError(PyFrameObject *f) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function was similar to + :c:func:`PyFrame_FastToLocals`, but would return ``0`` on success, and + ``-1`` with an exception set on failure. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. seealso:: + :pep:`667` + + Internal Frames ^^^^^^^^^^^^^^^ diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 58792edeed25e3..7908e4f8561aeb 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -95,6 +95,22 @@ There are a few functions specific to Python functions. .. versionadded:: 3.12 + +.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op) + + Return the keyword-only argument default values of the function object *op*. This can be a + dictionary of arguments or ``NULL``. + + +.. c:function:: int PyFunction_SetKwDefaults(PyObject *op, PyObject *defaults) + + Set the keyword-only argument default values of the function object *op*. + *defaults* must be a dictionary of keyword-only arguments or ``Py_None``. + + This function returns ``0`` on success, and returns ``-1`` with an exception + set on failure. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -123,6 +139,19 @@ There are a few functions specific to Python functions. Raises :exc:`SystemError` and returns ``-1`` on failure. +.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op) + PyObject *PyFunction_GET_GLOBALS(PyObject *op) + PyObject *PyFunction_GET_MODULE(PyObject *op) + PyObject *PyFunction_GET_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_CLOSURE(PyObject *op) + PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op) + + These functions are similar to their ``PyFunction_Get*`` counterparts, but + do not do type checking. Passing anything other than an instance of + :c:data:`PyFunction_Type` is undefined behavior. + + .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) Register *callback* as a function watcher for the current interpreter. @@ -169,7 +198,7 @@ There are a few functions specific to Python functions. unpredictable effects, including infinite recursion. If *event* is ``PyFunction_EVENT_CREATE``, then the callback is invoked - after `func` has been fully initialized. Otherwise, the callback is invoked + after *func* has been fully initialized. Otherwise, the callback is invoked before the modification to *func* takes place, so the prior state of *func* can be inspected. The runtime is permitted to optimize away the creation of function objects when possible. In such cases no event will be emitted. diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index d1f0982b818931..fed795b1e8c963 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -57,11 +57,49 @@ rules: Analogous to :c:macro:`PyObject_New` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:macro:: PyObject_GC_NewVar(TYPE, typeobj, size) Analogous to :c:macro:`PyObject_NewVar` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:function:: PyObject* PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *type, size_t extra_size) Analogous to :c:macro:`PyObject_GC_New` but allocates *extra_size* @@ -73,6 +111,10 @@ rules: The extra data will be deallocated with the object, but otherwise it is not managed by Python. + Memory allocated by this function must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + .. warning:: The function is marked as unstable because the final mechanism for reserving extra data after an instance is not yet decided. @@ -136,6 +178,21 @@ rules: Releases memory allocated to an object using :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_New`, + :c:macro:`PyObject_NewVar`, or related allocation functions; use + :c:func:`PyObject_Free` instead. + + .. seealso:: + + * :c:func:`PyObject_Free` is the non-GC equivalent of this function. + * :c:macro:`PyObject_GC_New` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. c:function:: void PyObject_GC_UnTrack(void *op) @@ -175,14 +232,18 @@ The :c:member:`~PyTypeObject.tp_traverse` handler must have the following type: object argument. If *visit* returns a non-zero value that value should be returned immediately. + The traversal function must not have any side effects. Implementations + may not modify the reference counts of any Python objects nor create or + destroy any Python objects. + To simplify writing :c:member:`~PyTypeObject.tp_traverse` handlers, a :c:func:`Py_VISIT` macro is provided. In order to use this macro, the :c:member:`~PyTypeObject.tp_traverse` implementation must name its arguments exactly *visit* and *arg*: -.. c:function:: void Py_VISIT(PyObject *o) +.. c:macro:: Py_VISIT(o) - If *o* is not ``NULL``, call the *visit* callback, with arguments *o* + If the :c:expr:`PyObject *` *o* is not ``NULL``, call the *visit* callback, with arguments *o* and *arg*. If *visit* returns a non-zero value, then return it. Using this macro, :c:member:`~PyTypeObject.tp_traverse` handlers look like:: diff --git a/Doc/c-api/gen.rst b/Doc/c-api/gen.rst index 0eb5922f6da75f..44f3bdbf959b9c 100644 --- a/Doc/c-api/gen.rst +++ b/Doc/c-api/gen.rst @@ -44,3 +44,41 @@ than explicitly calling :c:func:`PyGen_New` or :c:func:`PyGen_NewWithQualName`. with ``__name__`` and ``__qualname__`` set to *name* and *qualname*. A reference to *frame* is stolen by this function. The *frame* argument must not be ``NULL``. + +.. c:function:: PyCodeObject* PyGen_GetCode(PyGenObject *gen) + + Return a new :term:`strong reference` to the code object wrapped by *gen*. + This function always succeeds. + + +Asynchronous Generator Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. seealso:: + :pep:`525` + +.. c:var:: PyTypeObject PyAsyncGen_Type + + The type object corresponding to asynchronous generator objects. This is + available as :class:`types.AsyncGeneratorType` in the Python layer. + + .. versionadded:: 3.6 + +.. c:function:: PyObject *PyAsyncGen_New(PyFrameObject *frame, PyObject *name, PyObject *qualname) + + Create a new asynchronous generator wrapping *frame*, with ``__name__`` and + ``__qualname__`` set to *name* and *qualname*. *frame* is stolen by this + function and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to the + new asynchronous generator. On failure, this function returns ``NULL`` + with an exception set. + + .. versionadded:: 3.6 + +.. c:function:: int PyAsyncGen_CheckExact(PyObject *op) + + Return true if *op* is an asynchronous generator object, false otherwise. + This function always succeeds. + + .. versionadded:: 3.6 diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 00f8cb887dc7eb..1ad712b0ce4f2b 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -11,47 +11,103 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. .. versionadded:: 3.2 + +.. c:macro:: Py_HASH_ALGORITHM + + A numerical value indicating the algorithm for hashing of :class:`str`, + :class:`bytes`, and :class:`memoryview`. + + The algorithm name is exposed by :data:`sys.hash_info.algorithm`. + + .. versionadded:: 3.4 + + +.. c:macro:: Py_HASH_FNV + Py_HASH_SIPHASH24 + Py_HASH_SIPHASH13 + + Numerical values to compare to :c:macro:`Py_HASH_ALGORITHM` to determine + which algorithm is used for hashing. The hash algorithm can be configured + via the configure :option:`--with-hash-algorithm` option. + + .. versionadded:: 3.4 + Add :c:macro:`!Py_HASH_FNV` and :c:macro:`!Py_HASH_SIPHASH24`. + + .. versionadded:: 3.11 + Add :c:macro:`!Py_HASH_SIPHASH13`. + + +.. c:macro:: Py_HASH_CUTOFF + + Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A + instead of the algorithm described by :c:macro:`Py_HASH_ALGORITHM`. + + - A :c:macro:`!Py_HASH_CUTOFF` of 0 disables the optimization. + - :c:macro:`!Py_HASH_CUTOFF` must be non-negative and less or equal than 7. + + 32-bit platforms should use a cutoff smaller than 64-bit platforms because + it is easier to create colliding strings. A cutoff of 7 on 64-bit platforms + and 5 on 32-bit platforms should provide a decent safety margin. + + This corresponds to the :data:`sys.hash_info.cutoff` constant. + + .. versionadded:: 3.4 + + .. c:macro:: PyHASH_MODULUS - The `Mersenne prime `_ ``P = 2**n -1``, used for numeric hash scheme. + The `Mersenne prime `_ ``P = 2**n -1``, + used for numeric hash scheme. + + This corresponds to the :data:`sys.hash_info.modulus` constant. .. versionadded:: 3.13 + .. c:macro:: PyHASH_BITS The exponent ``n`` of ``P`` in :c:macro:`PyHASH_MODULUS`. .. versionadded:: 3.13 + .. c:macro:: PyHASH_MULTIPLIER Prime multiplier used in string and various other hashes. .. versionadded:: 3.13 + .. c:macro:: PyHASH_INF The hash value returned for a positive infinity. + This corresponds to the :data:`sys.hash_info.inf` constant. + .. versionadded:: 3.13 + .. c:macro:: PyHASH_IMAG The multiplier used for the imaginary part of a complex number. + This corresponds to the :data:`sys.hash_info.imag` constant. + .. versionadded:: 3.13 + .. c:type:: PyHash_FuncDef Hash function definition used by :c:func:`PyHash_GetFuncDef`. - .. c::member:: Py_hash_t (*const hash)(const void *, Py_ssize_t) + .. c:member:: Py_hash_t (*const hash)(const void *, Py_ssize_t) Hash function. @@ -59,14 +115,20 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. Hash function name (UTF-8 encoded string). + This corresponds to the :data:`sys.hash_info.algorithm` constant. + .. c:member:: const int hash_bits Internal size of the hash value in bits. + This corresponds to the :data:`sys.hash_info.hash_bits` constant. + .. c:member:: const int seed_bits Size of seed input in bits. + This corresponds to the :data:`sys.hash_info.seed_bits` constant. + .. versionadded:: 3.4 diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 1cab3ce3061ec9..369e95dd02251b 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -327,6 +327,13 @@ Importing Modules initialization. +.. c:var:: struct _inittab *PyImport_Inittab + + The table of built-in modules used by Python initialization. Do not use this directly; + use :c:func:`PyImport_AppendInittab` and :c:func:`PyImport_ExtendInittab` + instead. + + .. c:function:: PyObject* PyImport_ImportModuleAttr(PyObject *mod_name, PyObject *attr_name) Import the module *mod_name* and get its attribute *attr_name*. diff --git a/Doc/c-api/index.rst b/Doc/c-api/index.rst index ba56b03c6ac8e7..e9df2a304d975b 100644 --- a/Doc/c-api/index.rst +++ b/Doc/c-api/index.rst @@ -17,6 +17,7 @@ document the API functions in detail. veryhigh.rst refcounting.rst exceptions.rst + extension-modules.rst utilities.rst abstract.rst concrete.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 52f64a61006b74..1559f1c8fcd6dd 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -203,7 +203,7 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. Set by the :option:`-i` option. - .. deprecated:: 3.12 + .. deprecated-removed:: 3.12 3.15 .. c:var:: int Py_IsolatedFlag @@ -498,17 +498,8 @@ Initializing and finalizing the interpreter strings other than those passed in (however, the contents of the strings pointed to by the argument list are not modified). - The return value will be ``0`` if the interpreter exits normally (i.e., - without an exception), ``1`` if the interpreter exits due to an exception, - or ``2`` if the argument list does not represent a valid Python command - line. - - Note that if an otherwise unhandled :exc:`SystemExit` is raised, this - function will not return ``1``, but exit the process, as long as - ``Py_InspectFlag`` is not set. If ``Py_InspectFlag`` is set, execution will - drop into the interactive Python prompt, at which point a second otherwise - unhandled :exc:`SystemExit` will still exit the process, while any other - means of exiting will set the return value as described above. + The return value is ``2`` if the argument list does not represent a valid + Python command line, and otherwise the same as :c:func:`Py_RunMain`. In terms of the CPython runtime configuration APIs documented in the :ref:`runtime configuration ` section (and without accounting @@ -545,23 +536,18 @@ Initializing and finalizing the interpreter If :c:member:`PyConfig.inspect` is not set (the default), the return value will be ``0`` if the interpreter exits normally (that is, without raising - an exception), or ``1`` if the interpreter exits due to an exception. If an - otherwise unhandled :exc:`SystemExit` is raised, the function will immediately - exit the process instead of returning ``1``. + an exception), the exit status of an unhandled :exc:`SystemExit`, or ``1`` + for any other unhandled exception. If :c:member:`PyConfig.inspect` is set (such as when the :option:`-i` option is used), rather than returning when the interpreter exits, execution will instead resume in an interactive Python prompt (REPL) using the ``__main__`` module's global namespace. If the interpreter exited with an exception, it is immediately raised in the REPL session. The function return value is - then determined by the way the *REPL session* terminates: returning ``0`` - if the session terminates without raising an unhandled exception, exiting - immediately for an unhandled :exc:`SystemExit`, and returning ``1`` for - any other unhandled exception. + then determined by the way the *REPL session* terminates: ``0``, ``1``, or + the status of a :exc:`SystemExit`, as specified above. - This function always finalizes the Python interpreter regardless of whether - it returns a value or immediately exits the process due to an unhandled - :exc:`SystemExit` exception. + This function always finalizes the Python interpreter before it returns. See :ref:`Python Configuration ` for an example of a customized Python that always runs in isolated mode using @@ -1083,8 +1069,36 @@ Note that the ``PyGILState_*`` functions assume there is only one global interpreter (created automatically by :c:func:`Py_Initialize`). Python supports the creation of additional interpreters (using :c:func:`Py_NewInterpreter`), but mixing multiple interpreters and the -``PyGILState_*`` API is unsupported. +``PyGILState_*`` API is unsupported. This is because :c:func:`PyGILState_Ensure` +and similar functions default to :term:`attaching ` a +:term:`thread state` for the main interpreter, meaning that the thread can't safely +interact with the calling subinterpreter. + +Supporting subinterpreters in non-Python threads +------------------------------------------------ + +If you would like to support subinterpreters with non-Python created threads, you +must use the ``PyThreadState_*`` API instead of the traditional ``PyGILState_*`` +API. + +In particular, you must store the interpreter state from the calling +function and pass it to :c:func:`PyThreadState_New`, which will ensure that +the :term:`thread state` is targeting the correct interpreter:: + + /* The return value of PyInterpreterState_Get() from the + function that created this thread. */ + PyInterpreterState *interp = ThreadData->interp; + PyThreadState *tstate = PyThreadState_New(interp); + PyThreadState_Swap(tstate); + + /* GIL of the subinterpreter is now held. + Perform Python actions here. */ + result = CallSomeFunction(); + /* evaluate result or handle exception */ + /* Destroy the thread state. No Python API allowed beyond this point. */ + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); .. _fork-and-threads: @@ -1170,6 +1184,12 @@ code, or when embedding the Python interpreter: interpreter lock is also shared by all threads, regardless of to which interpreter they belong. + .. versionchanged:: 3.12 + + :pep:`684` introduced the possibility + of a :ref:`per-interpreter GIL `. + See :c:func:`Py_NewInterpreterFromConfig`. + .. c:type:: PyThreadState @@ -1258,13 +1278,30 @@ code, or when embedding the Python interpreter: This function is safe to call without an :term:`attached thread state`; it will simply return ``NULL`` indicating that there was no prior thread state. - .. seealso: + .. seealso:: :c:func:`PyEval_ReleaseThread` + .. note:: + Similar to :c:func:`PyGILState_Ensure`, this function will hang the + thread if the runtime is finalizing. + The following functions use thread-local storage, and are not compatible with sub-interpreters: +.. c:type:: PyGILState_STATE + + The type of the value returned by :c:func:`PyGILState_Ensure` and passed to + :c:func:`PyGILState_Release`. + + .. c:enumerator:: PyGILState_LOCKED + + The GIL was already held when :c:func:`PyGILState_Ensure` was called. + + .. c:enumerator:: PyGILState_UNLOCKED + + The GIL was not held when :c:func:`PyGILState_Ensure` was called. + .. c:function:: PyGILState_STATE PyGILState_Ensure() Ensure that the current thread is ready to call the Python C API regardless @@ -1287,10 +1324,10 @@ with sub-interpreters: When the function returns, there will be an :term:`attached thread state` and the thread will be able to call arbitrary Python code. Failure is a fatal error. - .. note:: - Calling this function from a thread when the runtime is finalizing will - hang the thread until the program exits, even if the thread was not - created by Python. Refer to + .. warning:: + Calling this function when the runtime is finalizing is unsafe. Doing + so will either hang the thread until the program ends, or fully crash + the interpreter in rare cases. Refer to :ref:`cautions-regarding-runtime-finalization` for more details. .. versionchanged:: 3.14 @@ -1307,7 +1344,6 @@ with sub-interpreters: Every call to :c:func:`PyGILState_Ensure` must be matched by a call to :c:func:`PyGILState_Release` on the same thread. - .. c:function:: PyThreadState* PyGILState_GetThisThreadState() Get the :term:`attached thread state` for this thread. May return ``NULL`` if no @@ -1315,20 +1351,30 @@ with sub-interpreters: always has such a thread-state, even if no auto-thread-state call has been made on the main thread. This is mainly a helper/diagnostic function. - .. seealso: :c:func:`PyThreadState_Get`` + .. note:: + This function may return non-``NULL`` even when the :term:`thread state` + is detached. + Prefer :c:func:`PyThreadState_Get` or :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. seealso:: :c:func:`PyThreadState_Get` .. c:function:: int PyGILState_Check() Return ``1`` if the current thread is holding the :term:`GIL` and ``0`` otherwise. This function can be called from any thread at any time. - Only if it has had its Python thread state initialized and currently is - holding the :term:`GIL` will it return ``1``. + Only if it has had its :term:`thread state ` initialized + via :c:func:`PyGILState_Ensure` will it return ``1``. This is mainly a helper/diagnostic function. It can be useful for example in callback contexts or memory allocation functions when knowing that the :term:`GIL` is locked can allow the caller to perform sensitive actions or otherwise behave differently. + .. note:: + If the current Python process has ever created a subinterpreter, this + function will *always* return ``1``. Prefer :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. versionadded:: 3.4 @@ -1387,7 +1433,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp) Reset all information in an interpreter state object. There must be - an :term:`attached thread state` for the the interpreter. + an :term:`attached thread state` for the interpreter. .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear @@ -1410,11 +1456,11 @@ All of the following functions must be called after :c:func:`Py_Initialize`. must be :term:`attached ` .. versionchanged:: 3.9 - This function now calls the :c:member:`PyThreadState.on_delete` callback. + This function now calls the :c:member:`!PyThreadState.on_delete` callback. Previously, that happened in :c:func:`PyThreadState_Delete`. .. versionchanged:: 3.13 - The :c:member:`PyThreadState.on_delete` callback was removed. + The :c:member:`!PyThreadState.on_delete` callback was removed. .. c:function:: void PyThreadState_Delete(PyThreadState *tstate) @@ -1485,6 +1531,63 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. versionadded:: 3.11 +.. c:function:: int PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, void *stack_start_addr, size_t stack_size) + + Set the stack protection start address and stack protection size + of a Python thread state. + + On success, return ``0``. + On failure, set an exception and return ``-1``. + + CPython implements :ref:`recursion control ` for C code by raising + :py:exc:`RecursionError` when it notices that the machine execution stack is close + to overflow. See for example the :c:func:`Py_EnterRecursiveCall` function. + For this, it needs to know the location of the current thread's stack, which it + normally gets from the operating system. + When the stack is changed, for example using context switching techniques like the + Boost library's ``boost::context``, you must call + :c:func:`~PyUnstable_ThreadState_SetStackProtection` to inform CPython of the change. + + Call :c:func:`~PyUnstable_ThreadState_SetStackProtection` either before + or after changing the stack. + Do not call any other Python C API between the call and the stack + change. + + See :c:func:`PyUnstable_ThreadState_ResetStackProtection` for undoing this operation. + + .. versionadded:: 3.14.1 + + .. warning:: + + This function was added in a bugfix release, and + extensions that use it will be incompatible with Python 3.14.0. + Most packaging tools for Python are not able to handle this + incompatibility automatically, and will need explicit configuration. + When using PyPA standards (wheels and source distributions), + specify ``Requires-Python: != 3.14.0.*`` in + `core metadata `_. + + +.. c:function:: void PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) + + Reset the stack protection start address and stack protection size + of a Python thread state to the operating system defaults. + + See :c:func:`PyUnstable_ThreadState_SetStackProtection` for an explanation. + + .. versionadded:: 3.14.1 + + .. warning:: + + This function was added in a bugfix release, and + extensions that use it will be incompatible with Python 3.14.0. + Most packaging tools for Python are not able to handle this + incompatibility automatically, and will need explicit configuration. + When using PyPA standards (wheels and source distributions), + specify ``Requires-Python: != 3.14.0.*`` in + `core metadata `_. + + .. c:function:: PyInterpreterState* PyInterpreterState_Get(void) Get the current interpreter. @@ -1514,6 +1617,9 @@ All of the following functions must be called after :c:func:`Py_Initialize`. This is not a replacement for :c:func:`PyModule_GetState()`, which extensions should use to store interpreter-specific state information. + The returned dictionary is borrowed from the interpreter and is valid until + interpreter shutdown. + .. versionadded:: 3.8 @@ -1848,6 +1954,8 @@ function. You can create and destroy them using the following functions: haven't been explicitly destroyed at that point. +.. _per-interpreter-gil: + A Per-Interpreter GIL --------------------- @@ -1859,7 +1967,7 @@ being blocked by other interpreters or blocking any others. Thus a single Python process can truly take advantage of multiple CPU cores when running Python code. The isolation also encourages a different approach to concurrency than that of just using threads. -(See :pep:`554`.) +(See :pep:`554` and :pep:`684`.) Using an isolated interpreter requires vigilance in preserving that isolation. That especially means not sharing any objects or mutable @@ -1964,6 +2072,29 @@ pointer and a void pointer argument. called from the main interpreter. Each subinterpreter now has its own list of scheduled calls. + .. versionchanged:: 3.12 + This function now always schedules *func* to be run in the main + interpreter. + + +.. c:function:: int Py_MakePendingCalls(void) + + Execute all pending calls. This is usually executed automatically by the + interpreter. + + This function returns ``0`` on success, and returns ``-1`` with an exception + set on failure. + + If this is not called in the main thread of the main + interpreter, this function does nothing and returns ``0``. + The caller must hold an :term:`attached thread state`. + + .. versionadded:: 3.1 + + .. versionchanged:: 3.12 + This function only runs pending calls in the main interpreter. + + .. _profiling: Profiling and Tracing @@ -2414,6 +2545,18 @@ The C-API provides a basic mutual exclusion lock. .. versionadded:: 3.13 +.. c:function:: int PyMutex_IsLocked(PyMutex *m) + + Returns non-zero if the mutex *m* is currently locked, zero otherwise. + + .. note:: + + This function is intended for use in assertions and debugging only and + should not be used to make concurrency control decisions, as the lock + state may change immediately after the check. + + .. versionadded:: 3.14 + .. _python-critical-section-api: Python Critical Section API @@ -2424,12 +2567,26 @@ per-object locks for :term:`free-threaded ` CPython. They are intended to replace reliance on the :term:`global interpreter lock`, and are no-ops in versions of Python with the global interpreter lock. +Critical sections are intended to be used for custom types implemented +in C-API extensions. They should generally not be used with built-in types like +:class:`list` and :class:`dict` because their public C-APIs +already use critical sections internally, with the notable +exception of :c:func:`PyDict_Next`, which requires critical section +to be acquired externally. + Critical sections avoid deadlocks by implicitly suspending active critical -sections and releasing the locks during calls to :c:func:`PyEval_SaveThread`. -When :c:func:`PyEval_RestoreThread` is called, the most recent critical section -is resumed, and its locks reacquired. This means the critical section API -provides weaker guarantees than traditional locks -- they are useful because -their behavior is similar to the :term:`GIL`. +sections, hence, they do not provide exclusive access such as provided by +traditional locks like :c:type:`PyMutex`. When a critical section is started, +the per-object lock for the object is acquired. If the code executed inside the +critical section calls C-API functions then it can suspend the critical section thereby +releasing the per-object lock, so other threads can acquire the per-object lock +for the same object. + +Variants that accept :c:type:`PyMutex` pointers rather than Python objects are also +available. Use these variants to start a critical section in a situation where +there is no :c:type:`PyObject` -- for example, when working with a C type that +does not extend or wrap :c:type:`PyObject` but still needs to call into the C +API in a manner that might lead to deadlocks. The functions and structs used by the macros are exposed for cases where C macros are not available. They should only be used as in the @@ -2476,6 +2633,23 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. .. versionadded:: 3.13 +.. c:macro:: Py_BEGIN_CRITICAL_SECTION_MUTEX(m) + + Locks the mutex *m* and begins a critical section. + + In the free-threaded build, this macro expands to:: + + { + PyCriticalSection _py_cs; + PyCriticalSection_BeginMutex(&_py_cs, m) + + Note that unlike :c:macro:`Py_BEGIN_CRITICAL_SECTION`, there is no cast for + the argument of the macro - it must be a :c:type:`PyMutex` pointer. + + On the default build, this macro expands to ``{``. + + .. versionadded:: 3.14 + .. c:macro:: Py_END_CRITICAL_SECTION() Ends the critical section and releases the per-object lock. @@ -2505,6 +2679,23 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. .. versionadded:: 3.13 +.. c:macro:: Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) + + Locks the mutexes *m1* and *m2* and begins a critical section. + + In the free-threaded build, this macro expands to:: + + { + PyCriticalSection2 _py_cs2; + PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) + + Note that unlike :c:macro:`Py_BEGIN_CRITICAL_SECTION2`, there is no cast for + the arguments of the macro - they must be :c:type:`PyMutex` pointers. + + On the default build, this macro expands to ``{``. + + .. versionadded:: 3.14 + .. c:macro:: Py_END_CRITICAL_SECTION2() Ends the critical section and releases the per-object locks. @@ -2517,3 +2708,220 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. In the default build, this macro expands to ``}``. .. versionadded:: 3.13 + + +Legacy Locking APIs +------------------- + +These APIs are obsolete since Python 3.13 with the introduction of +:c:type:`PyMutex`. + +.. versionchanged:: 3.15 + These APIs are now a simple wrapper around ``PyMutex``. + + +.. c:type:: PyThread_type_lock + + A pointer to a mutual exclusion lock. + + +.. c:type:: PyLockStatus + + The result of acquiring a lock with a timeout. + + .. c:namespace:: NULL + + .. c:enumerator:: PY_LOCK_FAILURE + + Failed to acquire the lock. + + .. c:enumerator:: PY_LOCK_ACQUIRED + + The lock was successfully acquired. + + .. c:enumerator:: PY_LOCK_INTR + + The lock was interrupted by a signal. + + +.. c:function:: PyThread_type_lock PyThread_allocate_lock(void) + + Allocate a new lock. + + On success, this function returns a lock; on failure, this + function returns ``0`` without an exception set. + + The caller does not need to hold an :term:`attached thread state`. + + .. versionchanged:: 3.15 + This function now always uses :c:type:`PyMutex`. In prior versions, this + would use a lock provided by the operating system. + + +.. c:function:: void PyThread_free_lock(PyThread_type_lock lock) + + Destroy *lock*. The lock should not be held by any thread when calling + this. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, long long microseconds, int intr_flag) + + Acquire *lock* with a timeout. + + This will wait for *microseconds* microseconds to acquire the lock. If the + timeout expires, this function returns :c:enumerator:`PY_LOCK_FAILURE`. + If *microseconds* is ``-1``, this will wait indefinitely until the lock has + been released. + + If *intr_flag* is ``1``, acquiring the lock may be interrupted by a signal, + in which case this function returns :c:enumerator:`PY_LOCK_INTR`. Upon + interruption, it's generally expected that the caller makes a call to + :c:func:`Py_MakePendingCalls` to propagate an exception to Python code. + + If the lock is successfully acquired, this function returns + :c:enumerator:`PY_LOCK_ACQUIRED`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) + + Acquire *lock*. + + If *waitflag* is ``1`` and another thread currently holds the lock, this + function will wait until the lock can be acquired and will always return + ``1``. + + If *waitflag* is ``0`` and another thread holds the lock, this function will + not wait and instead return ``0``. If the lock is not held by any other + thread, then this function will acquire it and return ``1``. + + Unlike :c:func:`PyThread_acquire_lock_timed`, acquiring the lock cannot be + interrupted by a signal. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_release_lock(PyThread_type_lock lock) + + Release *lock*. If *lock* is not held, then this function issues a + fatal error. + + The caller does not need to hold an :term:`attached thread state`. + + +Operating System Thread APIs +============================ + +.. c:macro:: PYTHREAD_INVALID_THREAD_ID + + Sentinel value for an invalid thread ID. + + This is currently equivalent to ``(unsigned long)-1``. + + +.. c:function:: unsigned long PyThread_start_new_thread(void (*func)(void *), void *arg) + + Start function *func* in a new thread with argument *arg*. + The resulting thread is not intended to be joined. + + *func* must not be ``NULL``, but *arg* may be ``NULL``. + + On success, this function returns the identifier of the new thread; on failure, + this returns :c:macro:`PYTHREAD_INVALID_THREAD_ID`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: unsigned long PyThread_get_thread_ident(void) + + Return the identifier of the current thread, which will never be zero. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_ident` + + +.. c:function:: PyObject *PyThread_GetInfo(void) + + Get general information about the current thread in the form of a + :ref:`struct sequence ` object. This information is + accessible as :py:attr:`sys.thread_info` in Python. + + On success, this returns a new :term:`strong reference` to the thread + information; on failure, this returns ``NULL`` with an exception set. + + The caller must hold an :term:`attached thread state`. + + +.. c:macro:: PY_HAVE_THREAD_NATIVE_ID + + This macro is defined when the system supports native thread IDs. + + +.. c:function:: unsigned long PyThread_get_thread_native_id(void) + + Get the native identifier of the current thread as it was assigned by the operating + system's kernel, which will never be less than zero. + + This function is only available when :c:macro:`PY_HAVE_THREAD_NATIVE_ID` is + defined. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_native_id` + + +.. c:function:: void PyThread_exit_thread(void) + + Terminate the current thread. This function is generally considered unsafe + and should be avoided. It is kept solely for backwards compatibility. + + This function is only safe to call if all functions in the full call + stack are written to safely allow it. + + .. warning:: + + If the current system uses POSIX threads (also known as "pthreads"), + this calls :manpage:`pthread_exit(3)`, which attempts to unwind the stack + and call C++ destructors on some libc implementations. However, if a + ``noexcept`` function is reached, it may terminate the process. + Other systems, such as macOS, do unwinding. + + On Windows, this function calls ``_endthreadex()``, which kills the thread + without calling C++ destructors. + + In any case, there is a risk of corruption on the thread's stack. + + .. deprecated:: 3.14 + + +.. c:function:: void PyThread_init_thread(void) + + Initialize ``PyThread*`` APIs. Python executes this function automatically, + so there's little need to call it from an extension module. + + +.. c:function:: int PyThread_set_stacksize(size_t size) + + Set the stack size of the current thread to *size* bytes. + + This function returns ``0`` on success, ``-1`` if *size* is invalid, or + ``-2`` if the system does not support changing the stack size. This function + does not set exceptions. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: size_t PyThread_get_stacksize(void) + + Return the stack size of the current thread in bytes, or ``0`` if the system's + default stack size is in use. + + The caller does not need to hold an :term:`attached thread state`. diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index e1931655618b1c..424d7de7089152 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -102,7 +102,7 @@ Error Handling * Set *\*err_msg* and return ``1`` if an error is set. * Set *\*err_msg* to ``NULL`` and return ``0`` otherwise. - An error message is an UTF-8 encoded string. + An error message is a UTF-8 encoded string. If *config* has an exit code, format the exit code as an error message. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index c8c60eb9f48f45..3a0c4c8e96068b 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -111,38 +111,20 @@ Useful macros ============= Several useful macros are defined in the Python header files. Many are -defined closer to where they are useful (e.g. :c:macro:`Py_RETURN_NONE`). +defined closer to where they are useful (for example, :c:macro:`Py_RETURN_NONE`, +:c:macro:`PyMODINIT_FUNC`). Others of a more general utility are defined here. This is not necessarily a complete listing. -.. c:macro:: PyMODINIT_FUNC - - Declare an extension module ``PyInit`` initialization function. The function - return type is :c:expr:`PyObject*`. The macro declares any special linkage - declarations required by the platform, and for C++ declares the function as - ``extern "C"``. - - The initialization function must be named :samp:`PyInit_{name}`, where - *name* is the name of the module, and should be the only non-\ ``static`` - item defined in the module file. Example:: - - static struct PyModuleDef spam_module = { - PyModuleDef_HEAD_INIT, - .m_name = "spam", - ... - }; - - PyMODINIT_FUNC - PyInit_spam(void) - { - return PyModule_Create(&spam_module); - } - .. c:macro:: Py_ABS(x) Return the absolute value of ``x``. + If the result cannot be represented (for example, if ``x`` has + :c:macro:`!INT_MIN` value for :c:expr:`int` type), the behavior is + undefined. + .. versionadded:: 3.3 .. c:macro:: Py_ALWAYS_INLINE @@ -189,6 +171,17 @@ complete listing. Like ``getenv(s)``, but returns ``NULL`` if :option:`-E` was passed on the command line (see :c:member:`PyConfig.use_environment`). +.. c:macro:: Py_LOCAL(type) + + Declare a function returning the specified *type* using a fast-calling + qualifier for functions that are local to the current file. + Semantically, this is equivalent to ``static type``. + +.. c:macro:: Py_LOCAL_INLINE(type) + + Equivalent to :c:macro:`Py_LOCAL` but additionally requests the function + be inlined. + .. c:macro:: Py_MAX(x, y) Return the maximum value between ``x`` and ``y``. @@ -201,6 +194,14 @@ complete listing. .. versionadded:: 3.6 +.. c:macro:: Py_MEMCPY(dest, src, n) + + This is a :term:`soft deprecated` alias to :c:func:`!memcpy`. + Use :c:func:`!memcpy` directly instead. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. + .. c:macro:: Py_MIN(x, y) Return the minimum value between ``x`` and ``y``. @@ -255,9 +256,32 @@ complete listing. .. versionadded:: 3.4 +.. c:macro:: Py_BUILD_ASSERT(cond) + + Asserts a compile-time condition *cond*, as a statement. + The build will fail if the condition is false or cannot be evaluated at compile time. + + For example:: + + Py_BUILD_ASSERT(sizeof(PyTime_t) == sizeof(int64_t)); + + .. versionadded:: 3.3 + +.. c:macro:: Py_BUILD_ASSERT_EXPR(cond) + + Asserts a compile-time condition *cond*, as an expression that evaluates to ``0``. + The build will fail if the condition is false or cannot be evaluated at compile time. + + For example:: + + #define foo_to_char(foo) \ + ((char *)(foo) + Py_BUILD_ASSERT_EXPR(offsetof(struct foo, string) == 0)) + + .. versionadded:: 3.3 + .. c:macro:: PyDoc_STRVAR(name, str) - Creates a variable with name ``name`` that can be used in docstrings. + Creates a variable with name *name* that can be used in docstrings. If Python is built without docstrings, the value will be empty. Use :c:macro:`PyDoc_STRVAR` for docstrings to support building @@ -289,6 +313,28 @@ complete listing. {NULL, NULL} }; +.. c:macro:: PyDoc_VAR(name) + + Declares a static character array variable with the given name *name*. + + For example:: + + PyDoc_VAR(python_doc) = PyDoc_STR("A genus of constricting snakes in the Pythonidae family native " + "to the tropics and subtropics of the Eastern Hemisphere."); + +.. c:macro:: Py_ARRAY_LENGTH(array) + + Compute the length of a statically allocated C array at compile time. + + The *array* argument must be a C array with a size known at compile time. + Passing an array with an unknown size, such as a heap-allocated array, + will result in a compilation error on some compilers, or otherwise produce + incorrect results. + + This is roughly equivalent to:: + + sizeof(array) / sizeof((array)[0]) + .. _api-objects: @@ -844,3 +890,41 @@ after every statement run by the interpreter.) Please refer to :file:`Misc/SpecialBuilds.txt` in the Python source distribution for more detailed information. + + +.. _c-api-tools: + +Recommended third party tools +============================= + +The following third party tools offer both simpler and more sophisticated +approaches to creating C, C++ and Rust extensions for Python: + +* `Cython `_ +* `cffi `_ +* `HPy `_ +* `nanobind `_ (C++) +* `Numba `_ +* `pybind11 `_ (C++) +* `PyO3 `_ (Rust) +* `SWIG `_ + +Using tools such as these can help avoid writing code that is tightly bound to +a particular version of CPython, avoid reference counting errors, and focus +more on your own code than on using the CPython API. In general, new versions +of Python can be supported by updating the tool, and your code will often use +newer and more efficient APIs automatically. Some tools also support compiling +for other implementations of Python from a single set of sources. + +These projects are not supported by the same people who maintain Python, and +issues need to be raised with the projects directly. Remember to check that the +project is still maintained and supported, as the list above may become +outdated. + +.. seealso:: + + `Python Packaging User Guide: Binary Extensions `_ + The Python Packaging User Guide not only covers several available + tools that simplify the creation of binary extensions, but also + discusses the various reasons why creating an extension module may be + desirable in the first place. diff --git a/Doc/c-api/iterator.rst b/Doc/c-api/iterator.rst index 6b7ba8c9979163..bfbfe3c9279980 100644 --- a/Doc/c-api/iterator.rst +++ b/Doc/c-api/iterator.rst @@ -50,3 +50,72 @@ sentinel value is returned. callable object that can be called with no parameters; each call to it should return the next item in the iteration. When *callable* returns a value equal to *sentinel*, the iteration will be terminated. + + +Range Objects +^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PyRange_Type + + The type object for :class:`range` objects. + + +.. c:function:: int PyRange_Check(PyObject *o) + + Return true if the object *o* is an instance of a :class:`range` object. + This function always succeeds. + + +Builtin Iterator Types +^^^^^^^^^^^^^^^^^^^^^^ + +These are built-in iteration types that are included in Python's C API, but +provide no additional functions. They are here for completeness. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * C type + * Python type + * * .. c:var:: PyTypeObject PyEnum_Type + * :py:class:`enumerate` + * * .. c:var:: PyTypeObject PyFilter_Type + * :py:class:`filter` + * * .. c:var:: PyTypeObject PyMap_Type + * :py:class:`map` + * * .. c:var:: PyTypeObject PyReversed_Type + * :py:class:`reversed` + * * .. c:var:: PyTypeObject PyZip_Type + * :py:class:`zip` + + +Other Iterator Objects +^^^^^^^^^^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PyByteArrayIter_Type +.. c:var:: PyTypeObject PyBytesIter_Type +.. c:var:: PyTypeObject PyListIter_Type +.. c:var:: PyTypeObject PyListRevIter_Type +.. c:var:: PyTypeObject PySetIter_Type +.. c:var:: PyTypeObject PyTupleIter_Type +.. c:var:: PyTypeObject PyRangeIter_Type +.. c:var:: PyTypeObject PyLongRangeIter_Type +.. c:var:: PyTypeObject PyDictIterKey_Type +.. c:var:: PyTypeObject PyDictRevIterKey_Type +.. c:var:: PyTypeObject PyDictIterValue_Type +.. c:var:: PyTypeObject PyDictRevIterValue_Type +.. c:var:: PyTypeObject PyDictIterItem_Type +.. c:var:: PyTypeObject PyDictRevIterItem_Type +.. c:var:: PyTypeObject PyODictIter_Type + + Type objects for iterators of various built-in objects. + + Do not create instances of these directly; prefer calling + :c:func:`PyObject_GetIter` instead. + + Note that there is no guarantee that a given built-in type uses a given iterator + type. For example, iterating over :class:`range` will use one of two iterator + types depending on the size of the range. Other types may start using a + similar scheme in the future, without warning. diff --git a/Doc/c-api/lifecycle.dot b/Doc/c-api/lifecycle.dot new file mode 100644 index 00000000000000..dca9f87e9e0aca --- /dev/null +++ b/Doc/c-api/lifecycle.dot @@ -0,0 +1,156 @@ +digraph "Life Events" { + graph [ + fontnames="svg" + fontsize=12.0 + id="life_events_graph" + layout="dot" + margin="0,0" + ranksep=0.25 + stylesheet="lifecycle.dot.css" + ] + node [ + fontname="Courier" + fontsize=12.0 + ] + edge [ + fontname="Times-Italic" + fontsize=12.0 + ] + + "start" [fontname="Times-Italic" shape=plain label=< start > style=invis] + { + rank="same" + "tp_new" [href="typeobj.html#c.PyTypeObject.tp_new" target="_top"] + "tp_alloc" [href="typeobj.html#c.PyTypeObject.tp_alloc" target="_top"] + } + "tp_init" [href="typeobj.html#c.PyTypeObject.tp_init" target="_top"] + "reachable" [fontname="Times-Italic" shape=box] + "tp_traverse" [ + href="typeobj.html#c.PyTypeObject.tp_traverse" + ordering="in" + target="_top" + ] + "finalized?" [ + fontname="Times-Italic" + label=finalized?> + ordering="in" + shape=diamond + tooltip="marked as finalized?" + ] + "tp_finalize" [ + href="typeobj.html#c.PyTypeObject.tp_finalize" + ordering="in" + target="_top" + ] + "tp_clear" [href="typeobj.html#c.PyTypeObject.tp_clear" target="_top"] + "uncollectable" [ + fontname="Times-Italic" + label=(leaked)> + shape=box + tooltip="uncollectable (leaked)" + ] + "tp_dealloc" [ + href="typeobj.html#c.PyTypeObject.tp_dealloc" + ordering="in" + target="_top" + ] + "tp_free" [href="typeobj.html#c.PyTypeObject.tp_free" target="_top"] + + "start" -> "tp_new" [ + label=< type call > + ] + "tp_new" -> "tp_alloc" [ + label=< direct call > arrowhead=empty + labeltooltip="tp_new to tp_alloc: direct call" + tooltip="tp_new to tp_alloc: direct call" + ] + "tp_new" -> "tp_init" [tooltip="tp_new to tp_init"] + "tp_init" -> "reachable" [tooltip="tp_init to reachable"] + "reachable" -> "tp_traverse" [ + dir="back" + label=< not in a
cyclic
isolate > + labeltooltip="tp_traverse to reachable: not in a cyclic isolate" + tooltip="tp_traverse to reachable: not in a cyclic isolate" + ] + "reachable" -> "tp_traverse" [ + label=< periodic
cyclic isolate
detection > + labeltooltip="reachable to tp_traverse: periodic cyclic isolate detection" + tooltip="reachable to tp_traverse: periodic cyclic isolate detection" + ] + "reachable" -> "tp_init" [tooltip="reachable to tp_init"] + "reachable" -> "tp_finalize" [ + dir="back" + label=< resurrected
(maybe remove
finalized mark) > + labeltooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + tooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + ] + "tp_traverse" -> "finalized?" [ + label=< cyclic
isolate > + labeltooltip="tp_traverse to finalized?: cyclic isolate" + tooltip="tp_traverse to finalized?: cyclic isolate" + ] + "reachable" -> "finalized?" [ + label=< no refs > + labeltooltip="reachable to finalized?: no refs" + tooltip="reachable to finalized?: no refs" + ] + "finalized?" -> "tp_finalize" [ + label=< no (mark
as finalized) > + labeltooltip="finalized? to tp_finalize: no (mark as finalized)" + tooltip="finalized? to tp_finalize: no (mark as finalized)" + ] + "finalized?" -> "tp_clear" [ + label=< yes > + labeltooltip="finalized? to tp_clear: yes" + tooltip="finalized? to tp_clear: yes" + ] + "tp_finalize" -> "tp_clear" [ + label=< no refs or
cyclic isolate > + labeltooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + tooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + ] + "tp_finalize" -> "tp_dealloc" [ + arrowtail=empty + dir="back" + href="lifecycle.html#c.PyObject_CallFinalizerFromDealloc" + style=dashed + label=< recommended
call (see
explanation)> + labeltooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + target="_top" + tooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + ] + "tp_finalize" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_finalize to tp_dealloc: no refs" + tooltip="tp_finalize to tp_dealloc: no refs" + ] + "tp_clear" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_clear to tp_dealloc: no refs" + tooltip="tp_clear to tp_dealloc: no refs" + ] + "tp_clear" -> "uncollectable" [ + label=< cyclic
isolate > + labeltooltip="tp_clear to uncollectable: cyclic isolate" + tooltip="tp_clear to uncollectable: cyclic isolate" + ] + "uncollectable" -> "tp_dealloc" [ + style=invis + tooltip="uncollectable to tp_dealloc" + ] + "reachable" -> "uncollectable" [ + label=< cyclic
isolate
(no GC
support) > + labeltooltip="reachable to uncollectable: cyclic isolate (no GC support)" + tooltip="reachable to uncollectable: cyclic isolate (no GC support)" + ] + "reachable" -> "tp_dealloc" [ + label=< no refs> + labeltooltip="reachable to tp_dealloc: no refs" + ] + "tp_dealloc" -> "tp_free" [ + arrowhead=empty + label=< direct call > + labeltooltip="tp_dealloc to tp_free: direct call" + tooltip="tp_dealloc to tp_free: direct call" + ] +} diff --git a/Doc/c-api/lifecycle.dot.css b/Doc/c-api/lifecycle.dot.css new file mode 100644 index 00000000000000..3abf95b74da6ba --- /dev/null +++ b/Doc/c-api/lifecycle.dot.css @@ -0,0 +1,21 @@ +#life_events_graph { + --svg-fgcolor: currentcolor; + --svg-bgcolor: transparent; +} +#life_events_graph a { + color: inherit; +} +#life_events_graph [stroke="black"] { + stroke: var(--svg-fgcolor); +} +#life_events_graph text, +#life_events_graph [fill="black"] { + fill: var(--svg-fgcolor); +} +#life_events_graph [fill="white"] { + fill: var(--svg-bgcolor); +} +#life_events_graph [fill="none"] { + /* On links, setting fill will make the entire shape clickable */ + fill: var(--svg-bgcolor); +} diff --git a/Doc/c-api/lifecycle.dot.pdf b/Doc/c-api/lifecycle.dot.pdf new file mode 100644 index 00000000000000..ed5b5039c83e2c Binary files /dev/null and b/Doc/c-api/lifecycle.dot.pdf differ diff --git a/Doc/c-api/lifecycle.dot.svg b/Doc/c-api/lifecycle.dot.svg new file mode 100644 index 00000000000000..7ace27dfcba113 --- /dev/null +++ b/Doc/c-api/lifecycle.dot.svg @@ -0,0 +1,374 @@ + + + + + + + +Life Events + + + + +tp_new + + +tp_new + + + + + +start->tp_new + + + + + + +    type call   + + + + + +tp_alloc + + +tp_alloc + + + + + +tp_new->tp_alloc + + + + + + +  direct call   + + + + + +tp_init + + +tp_init + + + + + +tp_new->tp_init + + + + + + + + +reachable + +reachable + + + +tp_init->reachable + + + + + + + + +reachable->tp_init + + + + + + + + +tp_traverse + + +tp_traverse + + + + + +reachable->tp_traverse + + + + + + +  not in a   +  cyclic   +  isolate   + + + + + +reachable->tp_traverse + + + + + + +  periodic   +  cyclic isolate    +  detection   + + + + + +finalized? + + +marked as +finalized? + + + + + +reachable->finalized? + + + + + + +  no refs   + + + + + +tp_finalize + + +tp_finalize + + + + + +reachable->tp_finalize + + + + + + +  resurrected   +  (maybe remove   +  finalized mark)   + + + + + +uncollectable + + +uncollectable +(leaked) + + + + + +reachable->uncollectable + + + + + + +  cyclic   +  isolate   +  (no GC   +  support)   + + + + + +tp_dealloc + + +tp_dealloc + + + + + +reachable->tp_dealloc + + + +  no refs + + + + + +tp_traverse->finalized? + + + + + + +  cyclic   +  isolate   + + + + + +finalized?->tp_finalize + + + + + + +  no (mark   +  as finalized)   + + + + + +tp_clear + + +tp_clear + + + + + +finalized?->tp_clear + + + + + + +  yes   + + + + + +tp_finalize->tp_clear + + + + + + +  no refs or    +  cyclic isolate   + + + + + +tp_finalize->tp_dealloc + + + + + + +  recommended +  call (see +  explanation) + + + + + +tp_finalize->tp_dealloc + + + + + + +   no refs   + + + + + +tp_clear->uncollectable + + + + + + +  cyclic   +  isolate   + + + + + +tp_clear->tp_dealloc + + + + + + +  no refs   + + + + + + +tp_free + + +tp_free + + + + + +tp_dealloc->tp_free + + + + + + +    direct call   + + + + + diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst new file mode 100644 index 00000000000000..5a170862a26f44 --- /dev/null +++ b/Doc/c-api/lifecycle.rst @@ -0,0 +1,271 @@ +.. highlight:: c + +.. _life-cycle: + +Object Life Cycle +================= + +This section explains how a type's slots relate to each other throughout the +life of an object. It is not intended to be a complete canonical reference for +the slots; instead, refer to the slot-specific documentation in +:ref:`type-structs` for details about a particular slot. + + +Life Events +----------- + +The figure below illustrates the order of events that can occur throughout an +object's life. An arrow from *A* to *B* indicates that event *B* can occur +after event *A* has occurred, with the arrow's label indicating the condition +that must be true for *B* to occur after *A*. + +.. only:: html and not epub + + .. raw:: html + + + + .. raw:: html + :file: lifecycle.dot.svg + + .. raw:: html + + + +.. only:: epub or not (html or latex) + + .. image:: lifecycle.dot.svg + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail below. + +.. only:: latex + + .. image:: lifecycle.dot.pdf + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail below. + +.. container:: + :name: life-events-graph-description + + Explanation: + + * When a new object is constructed by calling its type: + + #. :c:member:`~PyTypeObject.tp_new` is called to create a new object. + #. :c:member:`~PyTypeObject.tp_alloc` is directly called by + :c:member:`~PyTypeObject.tp_new` to allocate the memory for the new + object. + #. :c:member:`~PyTypeObject.tp_init` initializes the newly created object. + :c:member:`!tp_init` can be called again to re-initialize an object, if + desired. The :c:member:`!tp_init` call can also be skipped entirely, + for example by Python code calling :py:meth:`~object.__new__`. + + * After :c:member:`!tp_init` completes, the object is ready to use. + * Some time after the last reference to an object is removed: + + #. If an object is not marked as *finalized*, it might be finalized by + marking it as *finalized* and calling its + :c:member:`~PyTypeObject.tp_finalize` function. Python does + *not* finalize an object when the last reference to it is deleted; use + :c:func:`PyObject_CallFinalizerFromDealloc` to ensure that + :c:member:`~PyTypeObject.tp_finalize` is always called. + #. If the object is marked as finalized, + :c:member:`~PyTypeObject.tp_clear` might be called by the garbage collector + to clear references held by the object. It is *not* called when the + object's reference count reaches zero. + #. :c:member:`~PyTypeObject.tp_dealloc` is called to destroy the object. + To avoid code duplication, :c:member:`~PyTypeObject.tp_dealloc` typically + calls into :c:member:`~PyTypeObject.tp_clear` to free up the object's + references. + #. When :c:member:`~PyTypeObject.tp_dealloc` finishes object destruction, + it directly calls :c:member:`~PyTypeObject.tp_free` (usually set to + :c:func:`PyObject_Free` or :c:func:`PyObject_GC_Del` automatically as + appropriate for the type) to deallocate the memory. + + * The :c:member:`~PyTypeObject.tp_finalize` function is permitted to add a + reference to the object if desired. If it does, the object is + *resurrected*, preventing its pending destruction. (Only + :c:member:`!tp_finalize` is allowed to resurrect an object; + :c:member:`~PyTypeObject.tp_clear` and + :c:member:`~PyTypeObject.tp_dealloc` cannot without calling into + :c:member:`!tp_finalize`.) Resurrecting an object may + or may not cause the object's *finalized* mark to be removed. Currently, + Python does not remove the *finalized* mark from a resurrected object if + it supports garbage collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set) but does remove the mark if the object does not support + garbage collection; either or both of these behaviors may change in the + future. + * :c:member:`~PyTypeObject.tp_dealloc` can optionally call + :c:member:`~PyTypeObject.tp_finalize` via + :c:func:`PyObject_CallFinalizerFromDealloc` if it wishes to reuse that + code to help with object destruction. This is recommended because it + guarantees that :c:member:`!tp_finalize` is always called before + destruction. See the :c:member:`~PyTypeObject.tp_dealloc` documentation + for example code. + * If the object is a member of a :term:`cyclic isolate` and either + :c:member:`~PyTypeObject.tp_clear` fails to break the reference cycle or + the cyclic isolate is not detected (perhaps :func:`gc.disable` was called, + or the :c:macro:`Py_TPFLAGS_HAVE_GC` flag was erroneously omitted in one + of the involved types), the objects remain indefinitely uncollectable + (they "leak"). See :data:`gc.garbage`. + + If the object is marked as supporting garbage collection (the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`), the following events are also possible: + + * The garbage collector occasionally calls + :c:member:`~PyTypeObject.tp_traverse` to identify :term:`cyclic isolates + `. + * When the garbage collector discovers a :term:`cyclic isolate`, it + finalizes one of the objects in the group by marking it as *finalized* and + calling its :c:member:`~PyTypeObject.tp_finalize` function, if it has one. + This repeats until the cyclic isolate doesn't exist or all of the objects + have been finalized. + * :c:member:`~PyTypeObject.tp_finalize` is permitted to resurrect the object + by adding a reference from outside the :term:`cyclic isolate`. The new + reference causes the group of objects to no longer form a cyclic isolate + (the reference cycle may still exist, but if it does the objects are no + longer isolated). + * When the garbage collector discovers a :term:`cyclic isolate` and all of + the objects in the group have already been marked as *finalized*, the + garbage collector clears one or more of the uncleared objects in the group + (possibly concurrently) by calling each's + :c:member:`~PyTypeObject.tp_clear` function. This repeats as long as the + cyclic isolate still exists and not all of the objects have been cleared. + + +Cyclic Isolate Destruction +-------------------------- + +Listed below are the stages of life of a hypothetical :term:`cyclic isolate` +that continues to exist after each member object is finalized or cleared. It +is a memory leak if a cyclic isolate progresses through all of these stages; it should +vanish once all objects are cleared, if not sooner. A cyclic isolate can +vanish either because the reference cycle is broken or because the objects are +no longer isolated due to finalizer resurrection (see +:c:member:`~PyTypeObject.tp_finalize`). + +0. **Reachable** (not yet a cyclic isolate): All objects are in their normal, + reachable state. A reference cycle could exist, but an external reference + means the objects are not yet isolated. +#. **Unreachable but consistent:** The final reference from outside the cyclic + group of objects has been removed, causing the objects to become isolated + (thus a cyclic isolate is born). None of the group's objects have been + finalized or cleared yet. The cyclic isolate remains at this stage until + some future run of the garbage collector (not necessarily the next run + because the next run might not scan every object). +#. **Mix of finalized and not finalized:** Objects in a cyclic isolate are + finalized one at a time, which means that there is a period of time when the + cyclic isolate is composed of a mix of finalized and non-finalized objects. + Finalization order is unspecified, so it can appear random. A finalized + object must behave in a sane manner when non-finalized objects interact with + it, and a non-finalized object must be able to tolerate the finalization of + an arbitrary subset of its referents. +#. **All finalized:** All objects in a cyclic isolate are finalized before any + of them are cleared. +#. **Mix of finalized and cleared:** The objects can be cleared serially or + concurrently (but with the :term:`GIL` held); either way, some will finish + before others. A finalized object must be able to tolerate the clearing of + a subset of its referents. :pep:`442` calls this stage "cyclic trash". +#. **Leaked:** If a cyclic isolate still exists after all objects in the group + have been finalized and cleared, then the objects remain indefinitely + uncollectable (see :data:`gc.garbage`). It is a bug if a cyclic isolate + reaches this stage---it means the :c:member:`~PyTypeObject.tp_clear` methods + of the participating objects have failed to break the reference cycle as + required. + +If :c:member:`~PyTypeObject.tp_clear` did not exist, then Python would have no +way to safely break a reference cycle. Simply destroying an object in a cyclic +isolate would result in a dangling pointer, triggering undefined behavior when +an object referencing the destroyed object is itself destroyed. The clearing +step makes object destruction a two-phase process: first +:c:member:`~PyTypeObject.tp_clear` is called to partially destroy the objects +enough to detangle them from each other, then +:c:member:`~PyTypeObject.tp_dealloc` is called to complete the destruction. + +Unlike clearing, finalization is not a phase of destruction. A finalized +object must still behave properly by continuing to fulfill its design +contracts. An object's finalizer is allowed to execute arbitrary Python code, +and is even allowed to prevent the impending destruction by adding a reference. +The finalizer is only related to destruction by call order---if it runs, it runs +before destruction, which starts with :c:member:`~PyTypeObject.tp_clear` (if +called) and concludes with :c:member:`~PyTypeObject.tp_dealloc`. + +The finalization step is not necessary to safely reclaim the objects in a +cyclic isolate, but its existence makes it easier to design types that behave +in a sane manner when objects are cleared. Clearing an object might +necessarily leave it in a broken, partially destroyed state---it might be +unsafe to call any of the cleared object's methods or access any of its +attributes. With finalization, only finalized objects can possibly interact +with cleared objects; non-finalized objects are guaranteed to interact with +only non-cleared (but potentially finalized) objects. + +To summarize the possible interactions: + +* A non-finalized object might have references to or from non-finalized and + finalized objects, but not to or from cleared objects. +* A finalized object might have references to or from non-finalized, finalized, + and cleared objects. +* A cleared object might have references to or from finalized and cleared + objects, but not to or from non-finalized objects. + +Without any reference cycles, an object can be simply destroyed once its last +reference is deleted; the finalization and clearing steps are not necessary to +safely reclaim unused objects. However, it can be useful to automatically call +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` +before destruction anyway because type design is simplified when all objects +always experience the same series of events regardless of whether they +participated in a cyclic isolate. Python currently only calls +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` as +needed to destroy a cyclic isolate; this may change in a future version. + + +Functions +--------- + +To allocate and free memory, see :ref:`allocating-objects`. + + +.. c:function:: void PyObject_CallFinalizer(PyObject *op) + + Finalizes the object as described in :c:member:`~PyTypeObject.tp_finalize`. + Call this function (or :c:func:`PyObject_CallFinalizerFromDealloc`) instead + of calling :c:member:`~PyTypeObject.tp_finalize` directly because this + function may deduplicate multiple calls to :c:member:`!tp_finalize`. + Currently, calls are only deduplicated if the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set); this may + change in the future. + + +.. c:function:: int PyObject_CallFinalizerFromDealloc(PyObject *op) + + Same as :c:func:`PyObject_CallFinalizer` but meant to be called at the + beginning of the object's destructor (:c:member:`~PyTypeObject.tp_dealloc`). + There must not be any references to the object. If the object's finalizer + resurrects the object, this function returns -1; no further destruction + should happen. Otherwise, this function returns 0 and destruction can + continue normally. + + .. seealso:: + + :c:member:`~PyTypeObject.tp_dealloc` for example code. diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 25d9e62e387279..a905d4b23b76b0 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -40,9 +40,11 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Return a new :c:type:`PyLongObject` object from *v*, or ``NULL`` on failure. - The current implementation keeps an array of integer objects for all integers - between ``-5`` and ``256``. When you create an int in that range you actually - just get back a reference to the existing object. + .. impl-detail:: + + CPython keeps an array of integer objects for all integers + between ``-5`` and ``256``. When you create an int in that range + you actually just get back a reference to the existing object. .. c:function:: PyObject* PyLong_FromUnsignedLong(unsigned long v) @@ -159,6 +161,17 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:macro:: PyLong_FromPid(pid) + + Macro for creating a Python integer from a process identifier. + + This can be defined as an alias to :c:func:`PyLong_FromLong` or + :c:func:`PyLong_FromLongLong`, depending on the size of the system's + PID type. + + .. versionadded:: 3.2 + + .. c:function:: long PyLong_AsLong(PyObject *obj) .. index:: @@ -372,6 +385,10 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Set *\*value* to a signed C :c:expr:`int32_t` or :c:expr:`int64_t` representation of *obj*. + If *obj* is not an instance of :c:type:`PyLongObject`, first call its + :meth:`~object.__index__` method (if present) to convert it to a + :c:type:`PyLongObject`. + If the *obj* value is out of range, raise an :exc:`OverflowError`. Set *\*value* and return ``0`` on success. @@ -439,7 +456,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. All *n_bytes* of the buffer are written: large buffers are padded with zeroes. - If the returned value is greater than than *n_bytes*, the value was + If the returned value is greater than *n_bytes*, the value was truncated: as many of the lowest bits of the value as could fit are written, and the higher bits are ignored. This matches the typical behavior of a C-style downcast. @@ -569,6 +586,17 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:macro:: PyLong_AsPid(pid) + + Macro for converting a Python integer into a process identifier. + + This can be defined as an alias to :c:func:`PyLong_AsLong`, + :c:func:`PyLong_FromLongLong`, or :c:func:`PyLong_AsInt`, depending on the + size of the system's PID type. + + .. versionadded:: 3.2 + + .. c:function:: int PyLong_GetSign(PyObject *obj, int *sign) Get the sign of the integer object *obj*. diff --git a/Doc/c-api/mapping.rst b/Doc/c-api/mapping.rst index 1f55c0aa955c75..2476ebb9b69dce 100644 --- a/Doc/c-api/mapping.rst +++ b/Doc/c-api/mapping.rst @@ -102,7 +102,7 @@ See also :c:func:`PyObject_GetItem`, :c:func:`PyObject_SetItem` and .. note:: - Exceptions which occur when this calls :meth:`~object.__getitem__` + Exceptions which occur when this calls the :meth:`~object.__getitem__` method are silently ignored. For proper error handling, use :c:func:`PyMapping_HasKeyWithError`, :c:func:`PyMapping_GetOptionalItem` or :c:func:`PyObject_GetItem()` instead. @@ -116,7 +116,7 @@ See also :c:func:`PyObject_GetItem`, :c:func:`PyObject_SetItem` and .. note:: - Exceptions that occur when this calls :meth:`~object.__getitem__` + Exceptions that occur when this calls the :meth:`~object.__getitem__` method or while creating the temporary :class:`str` object are silently ignored. For proper error handling, use :c:func:`PyMapping_HasKeyStringWithError`, diff --git a/Doc/c-api/marshal.rst b/Doc/c-api/marshal.rst index 61218a1bf6f171..668a163b2df5a1 100644 --- a/Doc/c-api/marshal.rst +++ b/Doc/c-api/marshal.rst @@ -82,7 +82,7 @@ The following functions allow marshalled values to be read back in. assumes that no further objects will be read from the file, allowing it to aggressively load file data into memory so that the de-serialization can operate from data in memory rather than reading a byte at a time from the - file. Only use these variant if you are certain that you won't be reading + file. Only use this variant if you are certain that you won't be reading anything else from the file. On error, sets the appropriate exception (:exc:`EOFError`, :exc:`ValueError` diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 64ae35daa703b8..2395898010214d 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -102,7 +102,7 @@ All allocating functions belong to one of three different "domains" (see also strategies and are optimized for different purposes. The specific details on how every domain allocates memory or what internal functions each domain calls is considered an implementation detail, but for debugging purposes a simplified -table can be found at :ref:`here `. +table can be found at :ref:`default-memory-allocators`. The APIs used to allocate and free a block of memory must be from the same domain. For example, :c:func:`PyMem_Free` must be used to free memory allocated using :c:func:`PyMem_Malloc`. @@ -376,6 +376,24 @@ The :ref:`default object allocator ` uses the If *p* is ``NULL``, no operation is performed. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_GC_New` or + :c:macro:`PyObject_GC_NewVar`; use :c:func:`PyObject_GC_Del` instead. + + .. seealso:: + + * :c:func:`PyObject_GC_Del` is the equivalent of this function for memory + allocated by types that support garbage collection. + * :c:func:`PyObject_Malloc` + * :c:func:`PyObject_Realloc` + * :c:func:`PyObject_Calloc` + * :c:macro:`PyObject_New` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. _default-memory-allocators: @@ -654,6 +672,10 @@ This allocator is disabled if Python is configured with the :option:`--without-pymalloc` option. It can also be disabled at runtime using the :envvar:`PYTHONMALLOC` environment variable (ex: ``PYTHONMALLOC=malloc``). +Typically, it makes sense to disable the pymalloc allocator when building +Python with AddressSanitizer (:option:`--with-address-sanitizer`) which helps +uncover low level bugs within the C code. + Customize pymalloc Arena Allocator ---------------------------------- diff --git a/Doc/c-api/memoryview.rst b/Doc/c-api/memoryview.rst index f6038032805259..e4ac8b57673407 100644 --- a/Doc/c-api/memoryview.rst +++ b/Doc/c-api/memoryview.rst @@ -13,6 +13,12 @@ A :class:`memoryview` object exposes the C level :ref:`buffer interface any other object. +.. c:var:: PyTypeObject PyMemoryView_Type + + This instance of :c:type:`PyTypeObject` represents the Python memoryview + type. This is the same object as :class:`memoryview` in the Python layer. + + .. c:function:: PyObject *PyMemoryView_FromObject(PyObject *obj) Create a memoryview object from an object that provides the buffer interface. diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index f7f4d37d4c721f..11b488e4e52069 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -13,7 +13,7 @@ Module Objects .. index:: single: ModuleType (in module types) This instance of :c:type:`PyTypeObject` represents the Python module type. This - is exposed to Python programs as ``types.ModuleType``. + is exposed to Python programs as :py:class:`types.ModuleType`. .. c:function:: int PyModule_Check(PyObject *p) @@ -71,6 +71,9 @@ Module Objects ``PyObject_*`` functions rather than directly manipulate a module's :attr:`~object.__dict__`. + The returned reference is borrowed from the module; it is valid until + the module is destroyed. + .. c:function:: PyObject* PyModule_GetNameObject(PyObject *module) @@ -90,6 +93,10 @@ Module Objects Similar to :c:func:`PyModule_GetNameObject` but return the name encoded to ``'utf-8'``. + The returned buffer is only valid until the module is renamed or destroyed. + Note that Python code may rename a module by setting its :py:attr:`~module.__name__` + attribute. + .. c:function:: void* PyModule_GetState(PyObject *module) Return the "state" of the module, that is, a pointer to the block of memory @@ -102,6 +109,10 @@ Module Objects Return a pointer to the :c:type:`PyModuleDef` struct from which the module was created, or ``NULL`` if the module wasn't created from a definition. + On error, return ``NULL`` with an exception set. + Use :c:func:`PyErr_Occurred` to tell this case apart from a missing + :c:type:`!PyModuleDef`. + .. c:function:: PyObject* PyModule_GetFilenameObject(PyObject *module) @@ -122,30 +133,44 @@ Module Objects Similar to :c:func:`PyModule_GetFilenameObject` but return the filename encoded to 'utf-8'. + The returned buffer is only valid until the module's :py:attr:`~module.__file__` attribute + is reassigned or the module is destroyed. + .. deprecated:: 3.2 :c:func:`PyModule_GetFilename` raises :exc:`UnicodeEncodeError` on unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead. -.. _initializing-modules: +.. _pymoduledef: -Initializing C modules -^^^^^^^^^^^^^^^^^^^^^^ +Module definitions +------------------ -Modules objects are usually created from extension modules (shared libraries -which export an initialization function), or compiled-in modules -(where the initialization function is added using :c:func:`PyImport_AppendInittab`). -See :ref:`building` or :ref:`extending-with-embedding` for details. +The functions in the previous section work on any module object, including +modules imported from Python code. -The initialization function can either pass a module definition instance -to :c:func:`PyModule_Create`, and return the resulting module object, -or request "multi-phase initialization" by returning the definition struct itself. +Modules defined using the C API typically use a *module definition*, +:c:type:`PyModuleDef` -- a statically allocated, constant “description" of +how a module should be created. + +The definition is usually used to define an extension's “main” module object +(see :ref:`extension-modules` for details). +It is also used to +:ref:`create extension modules dynamically `. + +Unlike :c:func:`PyModule_New`, the definition allows management of +*module state* -- a piece of memory that is allocated and cleared together +with the module object. +Unlike the module's Python attributes, Python code cannot replace or delete +data stored in module state. .. c:type:: PyModuleDef The module definition struct, which holds all information needed to create - a module object. There is usually only one statically initialized variable - of this type for each module. + a module object. + This structure must be statically allocated (or be otherwise guaranteed + to be valid while any modules created from it exist). + Usually, there is only one variable of this type for each extension module. .. c:member:: PyModuleDef_Base m_base @@ -170,13 +195,15 @@ or request "multi-phase initialization" by returning the definition struct itsel and freed when the module object is deallocated, after the :c:member:`~PyModuleDef.m_free` function has been called, if present. - Setting ``m_size`` to ``-1`` means that the module does not support - sub-interpreters, because it has global state. - Setting it to a non-negative value means that the module can be re-initialized and specifies the additional amount of memory it requires - for its state. Non-negative ``m_size`` is required for multi-phase - initialization. + for its state. + + Setting ``m_size`` to ``-1`` means that the module does not support + sub-interpreters, because it has global state. + Negative ``m_size`` is only allowed when using + :ref:`legacy single-phase initialization ` + or when :ref:`creating modules dynamically `. See :PEP:`3121` for more details. @@ -189,7 +216,7 @@ or request "multi-phase initialization" by returning the definition struct itsel An array of slot definitions for multi-phase initialization, terminated by a ``{0, NULL}`` entry. - When using single-phase initialization, *m_slots* must be ``NULL``. + When using legacy single-phase initialization, *m_slots* must be ``NULL``. .. versionchanged:: 3.5 @@ -249,78 +276,9 @@ or request "multi-phase initialization" by returning the definition struct itsel .. versionchanged:: 3.9 No longer called before the module state is allocated. -Single-phase initialization -........................... - -The module initialization function may create and return the module object -directly. This is referred to as "single-phase initialization", and uses one -of the following two module creation functions: -.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) - - Create a new module object, given the definition in *def*. This behaves - like :c:func:`PyModule_Create2` with *module_api_version* set to - :c:macro:`PYTHON_API_VERSION`. - - -.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) - - Create a new module object, given the definition in *def*, assuming the - API version *module_api_version*. If that version does not match the version - of the running interpreter, a :exc:`RuntimeWarning` is emitted. - - Return ``NULL`` with an exception set on error. - - .. note:: - - Most uses of this function should be using :c:func:`PyModule_Create` - instead; only use this if you are sure you need it. - -Before it is returned from in the initialization function, the resulting module -object is typically populated using functions like :c:func:`PyModule_AddObjectRef`. - -.. _multi-phase-initialization: - -Multi-phase initialization -.......................... - -An alternate way to specify extensions is to request "multi-phase initialization". -Extension modules created this way behave more like Python modules: the -initialization is split between the *creation phase*, when the module object -is created, and the *execution phase*, when it is populated. -The distinction is similar to the :py:meth:`!__new__` and :py:meth:`!__init__` methods -of classes. - -Unlike modules created using single-phase initialization, these modules are not -singletons: if the *sys.modules* entry is removed and the module is re-imported, -a new module object is created, and the old module is subject to normal garbage -collection -- as with Python modules. -By default, multiple modules created from the same definition should be -independent: changes to one should not affect the others. -This means that all state should be specific to the module object (using e.g. -using :c:func:`PyModule_GetState`), or its contents (such as the module's -:attr:`~object.__dict__` or individual classes created with :c:func:`PyType_FromSpec`). - -All modules created using multi-phase initialization are expected to support -:ref:`sub-interpreters `. Making sure multiple modules -are independent is typically enough to achieve this. - -To request multi-phase initialization, the initialization function -(PyInit_modulename) returns a :c:type:`PyModuleDef` instance with non-empty -:c:member:`~PyModuleDef.m_slots`. Before it is returned, the ``PyModuleDef`` -instance must be initialized with the following function: - -.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) - - Ensures a module definition is a properly initialized Python object that - correctly reports its type and reference count. - - Returns *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. - - .. versionadded:: 3.5 - -The *m_slots* member of the module definition must point to an array of -``PyModuleDef_Slot`` structures: +Module slots +............ .. c:type:: PyModuleDef_Slot @@ -334,8 +292,6 @@ The *m_slots* member of the module definition must point to an array of .. versionadded:: 3.5 -The *m_slots* array must be terminated by a slot with id 0. - The available slot types are: .. c:macro:: Py_mod_create @@ -372,6 +328,8 @@ The available slot types are: ``PyModuleDef`` has non-``NULL`` ``m_traverse``, ``m_clear``, ``m_free``; non-zero ``m_size``; or slots other than ``Py_mod_create``. + .. versionadded:: 3.5 + .. c:macro:: Py_mod_exec Specifies a function that is called to *execute* the module. @@ -386,6 +344,8 @@ The available slot types are: If multiple ``Py_mod_exec`` slots are specified, they are processed in the order they appear in the *m_slots* array. + .. versionadded:: 3.5 + .. c:macro:: Py_mod_multiple_interpreters Specifies one of the following values: @@ -446,21 +406,48 @@ The available slot types are: .. versionadded:: 3.13 -See :PEP:`489` for more details on multi-phase initialization. -Low-level module creation functions -................................... +.. _moduledef-dynamic: + +Creating extension modules dynamically +-------------------------------------- + +The following functions may be used to create a module outside of an +extension's :ref:`initialization function `. +They are also used in +:ref:`single-phase initialization `. + +.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) + + Create a new module object, given the definition in *def*. + This is a macro that calls :c:func:`PyModule_Create2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. + +.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) + + Create a new module object, given the definition in *def*, assuming the + API version *module_api_version*. If that version does not match the version + of the running interpreter, a :exc:`RuntimeWarning` is emitted. + + Return ``NULL`` with an exception set on error. + + This function does not support slots. + The :c:member:`~PyModuleDef.m_slots` member of *def* must be ``NULL``. + + + .. note:: -The following functions are called under the hood when using multi-phase -initialization. They can be used directly, for example when creating module -objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and -``PyModule_ExecDef`` must be called to fully initialize a module. + Most uses of this function should be using :c:func:`PyModule_Create` + instead; only use this if you are sure you need it. .. c:function:: PyObject * PyModule_FromDefAndSpec(PyModuleDef *def, PyObject *spec) - Create a new module object, given the definition in *def* and the - ModuleSpec *spec*. This behaves like :c:func:`PyModule_FromDefAndSpec2` - with *module_api_version* set to :c:macro:`PYTHON_API_VERSION`. + This macro calls :c:func:`PyModule_FromDefAndSpec2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. .. versionadded:: 3.5 @@ -473,6 +460,10 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and Return ``NULL`` with an exception set on error. + Note that this does not process execution slots (:c:data:`Py_mod_exec`). + Both ``PyModule_FromDefAndSpec`` and ``PyModule_ExecDef`` must be called + to fully initialize a module. + .. note:: Most uses of this function should be using :c:func:`PyModule_FromDefAndSpec` @@ -486,35 +477,29 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and .. versionadded:: 3.5 -.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) +.. c:macro:: PYTHON_API_VERSION - Set the docstring for *module* to *docstring*. - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + The C API version. Defined for backwards compatibility. - .. versionadded:: 3.5 + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. -.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) +.. c:macro:: PYTHON_ABI_VERSION - Add the functions from the ``NULL`` terminated *functions* array to *module*. - Refer to the :c:type:`PyMethodDef` documentation for details on individual - entries (due to the lack of a shared module namespace, module level - "functions" implemented in C typically receive the module as their first - parameter, making them similar to instance methods on Python classes). - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + Defined as ``3`` for backwards compatibility. + + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. - .. versionadded:: 3.5 Support functions -................. +----------------- -The module initialization function (if using single phase initialization) or -a function called from a module execution slot (if using multi-phase -initialization), can use the following functions to help initialize the module -state: +The following functions are provided to help initialize a module +state. +They are intended for a module's execution slots (:c:data:`Py_mod_exec`), +the initialization function for legacy :ref:`single-phase initialization `, +or code that creates modules dynamically. .. c:function:: int PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value) @@ -663,12 +648,42 @@ state: .. versionadded:: 3.9 +.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) + + Add the functions from the ``NULL`` terminated *functions* array to *module*. + Refer to the :c:type:`PyMethodDef` documentation for details on individual + entries (due to the lack of a shared module namespace, module level + "functions" implemented in C typically receive the module as their first + parameter, making them similar to instance methods on Python classes). + + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + Some module authors may prefer defining functions in multiple + :c:type:`PyMethodDef` arrays; in that case they should call this function + directly. + + The *functions* array must be statically allocated (or otherwise guaranteed + to outlive the module object). + + .. versionadded:: 3.5 + +.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) + + Set the docstring for *module* to *docstring*. + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + + .. versionadded:: 3.5 + .. c:function:: int PyUnstable_Module_SetGIL(PyObject *module, void *gil) Indicate that *module* does or does not support running without the global interpreter lock (GIL), using one of the values from :c:macro:`Py_mod_gil`. It must be called during *module*'s initialization - function. If this function is not called during module initialization, the + function when using :ref:`single-phase-initialization`. + If this function is not called during module initialization, the import machinery assumes the module does not support running without the GIL. This function is only available in Python builds configured with :option:`--disable-gil`. @@ -677,10 +692,11 @@ state: .. versionadded:: 3.13 -Module lookup -^^^^^^^^^^^^^ +Module lookup (single-phase initialization) +........................................... -Single-phase initialization creates singleton modules that can be looked up +The legacy :ref:`single-phase initialization ` +initialization scheme creates singleton modules that can be looked up in the context of the current interpreter. This allows the module object to be retrieved later with only a reference to the module definition. @@ -701,7 +717,8 @@ since multiple such modules can be created from a single definition. Only effective on modules created using single-phase initialization. - Python calls ``PyState_AddModule`` automatically after importing a module, + Python calls ``PyState_AddModule`` automatically after importing a module + that uses :ref:`single-phase initialization `, so it is unnecessary (but harmless) to call it from module initialization code. An explicit call is needed only if the module's own init code subsequently calls ``PyState_FindModule``. @@ -709,6 +726,9 @@ since multiple such modules can be created from a single definition. mechanisms (either by calling it directly, or by referring to its implementation for details of the required state updates). + If a module was attached previously using the same *def*, it is replaced + by the new *module*. + The caller must have an :term:`attached thread state`. Return ``-1`` with an exception set on error, ``0`` on success. diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index 7926148302af0b..b0227c2f4faf15 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -136,7 +136,7 @@ Managing the Monitoring State ----------------------------- Monitoring states can be managed with the help of monitoring scopes. A scope -would typically correspond to a python function. +would typically correspond to a Python function. .. c:function:: int PyMonitoring_EnterScope(PyMonitoringState *state_array, uint64_t *version, const uint8_t *event_types, Py_ssize_t length) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 0fd159f1eb87f8..e215027330fb43 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -73,7 +73,7 @@ Object Protocol Flag to be used with multiple functions that print the object (like :c:func:`PyObject_Print` and :c:func:`PyFile_WriteObject`). - If passed, these function would use the :func:`str` of the object + If passed, these functions use the :func:`str` of the object instead of the :func:`repr`. @@ -585,7 +585,7 @@ Object Protocol Clear the managed dictionary of *obj*. - This function must only be called in a traverse function of the type which + This function must only be called in a clear function of the type which has the :c:macro:`Py_TPFLAGS_MANAGED_DICT` flag set. .. versionadded:: 3.13 @@ -596,12 +596,13 @@ Object Protocol if supported by the runtime. In the :term:`free-threaded ` build, this allows the interpreter to avoid reference count adjustments to *obj*, which may improve multi-threaded performance. The tradeoff is - that *obj* will only be deallocated by the tracing garbage collector. + that *obj* will only be deallocated by the tracing garbage collector, and + not when the interpreter no longer has any references to it. - This function returns ``1`` if deferred reference counting is enabled on *obj* - (including when it was enabled before the call), + This function returns ``1`` if deferred reference counting is enabled on *obj*, and ``0`` if deferred reference counting is not supported or if the hint was - ignored by the runtime. This function is thread-safe, and cannot fail. + ignored by the interpreter, such as when deferred reference counting is already + enabled on *obj*. This function is thread-safe, and cannot fail. This function does nothing on builds with the :term:`GIL` enabled, which do not support deferred reference counting. This also does nothing if *obj* is not @@ -609,7 +610,8 @@ Object Protocol :c:func:`PyObject_GC_IsTracked`). This function is intended to be used soon after *obj* is created, - by the code that creates it. + by the code that creates it, such as in the object's :c:member:`~PyTypeObject.tp_new` + slot. .. versionadded:: 3.14 diff --git a/Doc/c-api/objimpl.rst b/Doc/c-api/objimpl.rst index 8bd8c107c98bdf..83de4248039949 100644 --- a/Doc/c-api/objimpl.rst +++ b/Doc/c-api/objimpl.rst @@ -12,6 +12,7 @@ object types. .. toctree:: allocation.rst + lifecycle.rst structures.rst typeobj.rst gcsupport.rst diff --git a/Doc/c-api/picklebuffer.rst b/Doc/c-api/picklebuffer.rst new file mode 100644 index 00000000000000..9e2d92341b0f93 --- /dev/null +++ b/Doc/c-api/picklebuffer.rst @@ -0,0 +1,59 @@ +.. highlight:: c + +.. _picklebuffer-objects: + +.. index:: + pair: object; PickleBuffer + +Pickle buffer objects +--------------------- + +.. versionadded:: 3.8 + +A :class:`pickle.PickleBuffer` object wraps a :ref:`buffer-providing object +` for out-of-band data transfer with the :mod:`pickle` module. + + +.. c:var:: PyTypeObject PyPickleBuffer_Type + + This instance of :c:type:`PyTypeObject` represents the Python pickle buffer type. + This is the same object as :class:`pickle.PickleBuffer` in the Python layer. + + +.. c:function:: int PyPickleBuffer_Check(PyObject *op) + + Return true if *op* is a pickle buffer instance. + This function always succeeds. + + +.. c:function:: PyObject *PyPickleBuffer_FromObject(PyObject *obj) + + Create a pickle buffer from the object *obj*. + + This function will fail if *obj* doesn't support the :ref:`buffer protocol `. + + On success, return a new pickle buffer instance. + On failure, set an exception and return ``NULL``. + + Analogous to calling :class:`pickle.PickleBuffer` with *obj* in Python. + + +.. c:function:: const Py_buffer *PyPickleBuffer_GetBuffer(PyObject *picklebuf) + + Get a pointer to the underlying :c:type:`Py_buffer` that the pickle buffer wraps. + + The returned pointer is valid as long as *picklebuf* is alive and has not been + released. The caller must not modify or free the returned :c:type:`Py_buffer`. + If the pickle buffer has been released, raise :exc:`ValueError`. + + On success, return a pointer to the buffer view. + On failure, set an exception and return ``NULL``. + + +.. c:function:: int PyPickleBuffer_Release(PyObject *picklebuf) + + Release the underlying buffer held by the pickle buffer. + + Return ``0`` on success. On failure, set an exception and return ``-1``. + + Analogous to calling :meth:`pickle.PickleBuffer.release` in Python. diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst index b23f016f9b0a06..57a0728d4e9af4 100644 --- a/Doc/c-api/refcounting.rst +++ b/Doc/c-api/refcounting.rst @@ -210,7 +210,7 @@ of Python objects. Py_SETREF(dst, src); - That arranges to set *dst* to *src* _before_ releasing the reference + That arranges to set *dst* to *src* *before* releasing the reference to the old value of *dst*, so that any code triggered as a side-effect of *dst* getting torn down no longer believes *dst* points to a valid object. diff --git a/Doc/c-api/set.rst b/Doc/c-api/set.rst index cba823aa027bd6..09c0fb6b9c5f23 100644 --- a/Doc/c-api/set.rst +++ b/Doc/c-api/set.rst @@ -147,7 +147,7 @@ subtypes but not for instances of :class:`frozenset` or its subtypes. Return ``1`` if found and removed, ``0`` if not found (no action taken), and ``-1`` if an error is encountered. Does not raise :exc:`KeyError` for missing keys. Raise a - :exc:`TypeError` if the *key* is unhashable. Unlike the Python :meth:`~frozenset.discard` + :exc:`TypeError` if the *key* is unhashable. Unlike the Python :meth:`~set.discard` method, this function does not automatically convert unhashable sets into temporary frozensets. Raise :exc:`SystemError` if *set* is not an instance of :class:`set` or its subtype. diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst index 124e58cf950b7a..9b65e0b8d23d93 100644 --- a/Doc/c-api/stable.rst +++ b/Doc/c-api/stable.rst @@ -51,6 +51,7 @@ It is generally intended for specialized, low-level tools like debuggers. Projects that use this API are expected to follow CPython development and spend extra effort adjusting to changes. +.. _stable-application-binary-interface: Stable Application Binary Interface =================================== diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 987bc167c68d6c..b4e7cb1d77e1a3 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -28,18 +28,52 @@ under :ref:`reference counting `. object. In a normal "release" build, it contains only the object's reference count and a pointer to the corresponding type object. Nothing is actually declared to be a :c:type:`PyObject`, but every pointer - to a Python object can be cast to a :c:expr:`PyObject*`. Access to the - members must be done by using the macros :c:macro:`Py_REFCNT` and - :c:macro:`Py_TYPE`. + to a Python object can be cast to a :c:expr:`PyObject*`. + + The members must not be accessed directly; instead use macros such as + :c:macro:`Py_REFCNT` and :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_refcnt + + The object's reference count, as returned by :c:macro:`Py_REFCNT`. + Do not use this field directly; instead use functions and macros such as + :c:macro:`!Py_REFCNT`, :c:func:`Py_INCREF` and :c:func:`Py_DecRef`. + + The field type may be different from ``Py_ssize_t``, depending on + build configuration and platform. + + .. c:member:: PyTypeObject* ob_type + + The object's type. + Do not use this field directly; use :c:macro:`Py_TYPE` and + :c:func:`Py_SET_TYPE` instead. .. c:type:: PyVarObject - This is an extension of :c:type:`PyObject` that adds the :c:member:`~PyVarObject.ob_size` - field. This is only used for objects that have some notion of *length*. - This type does not often appear in the Python/C API. - Access to the members must be done by using the macros - :c:macro:`Py_REFCNT`, :c:macro:`Py_TYPE`, and :c:macro:`Py_SIZE`. + An extension of :c:type:`PyObject` that adds the + :c:member:`~PyVarObject.ob_size` field. + This is intended for objects that have some notion of *length*. + + As with :c:type:`!PyObject`, the members must not be accessed directly; + instead use macros such as :c:macro:`Py_SIZE`, :c:macro:`Py_REFCNT` and + :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_size + + A size field, whose contents should be considered an object's internal + implementation detail. + + Do not use this field directly; use :c:macro:`Py_SIZE` instead. + + Object creation functions such as :c:func:`PyObject_NewVar` will + generally set this field to the requested size (number of items). + After creation, arbitrary values can be stored in :c:member:`!ob_size` + using :c:macro:`Py_SET_SIZE`. + + To get an object's publicly exposed length, as returned by + the Python function :py:func:`len`, use :c:func:`PyObject_Length` + instead. .. c:macro:: PyObject_HEAD @@ -103,9 +137,8 @@ under :ref:`reference counting `. Get the type of the Python object *o*. - Return a :term:`borrowed reference`. - - Use the :c:func:`Py_SET_TYPE` function to set an object type. + The returned reference is :term:`borrowed ` from *o*. + Do not release it with :c:func:`Py_DECREF` or similar. .. versionchanged:: 3.11 :c:func:`Py_TYPE()` is changed to an inline static function. @@ -122,16 +155,26 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_TYPE(PyObject *o, PyTypeObject *type) - Set the object *o* type to *type*. + Set the type of object *o* to *type*, without any checking or reference + counting. + + This is a very low-level operation. + Consider instead setting the Python attribute :attr:`~object.__class__` + using :c:func:`PyObject_SetAttrString` or similar. + + Note that assigning an incompatible type can lead to undefined behavior. + + If *type* is a :ref:`heap type `, the caller must create a + new reference to it. + Similarly, if the old type of *o* is a heap type, the caller must release + a reference to that type. .. versionadded:: 3.9 .. c:function:: Py_ssize_t Py_SIZE(PyVarObject *o) - Get the size of the Python object *o*. - - Use the :c:func:`Py_SET_SIZE` function to set an object size. + Get the :c:member:`~PyVarObject.ob_size` field of *o*. .. versionchanged:: 3.11 :c:func:`Py_SIZE()` is changed to an inline static function. @@ -140,7 +183,7 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_SIZE(PyVarObject *o, Py_ssize_t size) - Set the object *o* size to *size*. + Set the :c:member:`~PyVarObject.ob_size` field of *o* to *size*. .. versionadded:: 3.9 @@ -404,6 +447,25 @@ definition with the same method name. slot. This is helpful because calls to PyCFunctions are optimized more than wrapper object calls. + +.. c:var:: PyTypeObject PyCMethod_Type + + The type object corresponding to Python C method objects. This is + available as :class:`types.BuiltinMethodType` in the Python layer. + + +.. c:function:: int PyCMethod_Check(PyObject *op) + + Return true if *op* is an instance of the :c:type:`PyCMethod_Type` type + or a subtype of it. This function always succeeds. + + +.. c:function:: int PyCMethod_CheckExact(PyObject *op) + + This is the same as :c:func:`PyCMethod_Check`, but does not account for + subtypes. + + .. c:function:: PyObject * PyCMethod_New(PyMethodDef *ml, PyObject *self, PyObject *module, PyTypeObject *cls) Turn *ml* into a Python :term:`callable` object. @@ -429,6 +491,24 @@ definition with the same method name. .. versionadded:: 3.9 +.. c:var:: PyTypeObject PyCFunction_Type + + The type object corresponding to Python C function objects. This is + available as :class:`types.BuiltinFunctionType` in the Python layer. + + +.. c:function:: int PyCFunction_Check(PyObject *op) + + Return true if *op* is an instance of the :c:type:`PyCFunction_Type` type + or a subtype of it. This function always succeeds. + + +.. c:function:: int PyCFunction_CheckExact(PyObject *op) + + This is the same as :c:func:`PyCFunction_Check`, but does not account for + subtypes. + + .. c:function:: PyObject * PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module) Equivalent to ``PyCMethod_New(ml, self, module, NULL)``. @@ -439,6 +519,62 @@ definition with the same method name. Equivalent to ``PyCMethod_New(ml, self, NULL, NULL)``. +.. c:function:: int PyCFunction_GetFlags(PyObject *func) + + Get the function's flags on *func* as they were passed to + :c:member:`~PyMethodDef.ml_flags`. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns the function's flags on success, and ``-1`` with an + exception set on failure. + + +.. c:function:: int PyCFunction_GET_FLAGS(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetFlags`, but without error + or type checking. + + +.. c:function:: PyCFunction PyCFunction_GetFunction(PyObject *func) + + Get the function pointer on *func* as it was passed to + :c:member:`~PyMethodDef.ml_meth`. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns the function pointer on success, and ``NULL`` with an + exception set on failure. + + +.. c:function:: int PyCFunction_GET_FUNCTION(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetFunction`, but without error + or type checking. + + +.. c:function:: PyObject *PyCFunction_GetSelf(PyObject *func) + + Get the "self" object on *func*. This is the object that would be passed + to the first argument of a :c:type:`PyCFunction`. For C function objects + created through a :c:type:`PyMethodDef` on a :c:type:`PyModuleDef`, this + is the resulting module object. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns a :term:`borrowed reference` to the "self" object + on success, and ``NULL`` with an exception set on failure. + + +.. c:function:: PyObject *PyCFunction_GET_SELF(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetSelf`, but without error or + type checking. + + Accessing attributes of extension types --------------------------------------- @@ -562,14 +698,12 @@ The following flags can be used with :c:member:`PyMemberDef.flags`: entry indicates an offset from the subclass-specific data, rather than from ``PyObject``. - Can only be used as part of :c:member:`Py_tp_members ` + Can only be used as part of the :c:data:`Py_tp_members` :c:type:`slot ` when creating a class using negative :c:member:`~PyType_Spec.basicsize`. It is mandatory in that case. - - This flag is only used in :c:type:`PyType_Slot`. - When setting :c:member:`~PyTypeObject.tp_members` during - class creation, Python clears it and sets + When setting :c:member:`~PyTypeObject.tp_members` from the slot during + class creation, Python clears the flag and sets :c:member:`PyMemberDef.offset` to the offset from the ``PyObject`` struct. .. index:: diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index b3c89800e386ff..3a8cfd95b03513 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -123,6 +123,24 @@ Operating System Utilities This is a thin wrapper around either :c:func:`!sigaction` or :c:func:`!signal`. Do not call those functions directly! + +.. c:function:: int PyOS_InterruptOccurred(void) + + Check if a :c:macro:`!SIGINT` signal has been received. + + Returns ``1`` if a :c:macro:`!SIGINT` has occurred and clears the signal flag, + or ``0`` otherwise. + + In most cases, you should prefer :c:func:`PyErr_CheckSignals` over this function. + :c:func:`!PyErr_CheckSignals` invokes the appropriate signal handlers + for all pending signals, allowing Python code to handle the signal properly. + This function only detects :c:macro:`!SIGINT` and does not invoke any Python + signal handlers. + + This function is async-signal-safe and this function cannot fail. + The caller must hold an :term:`attached thread state`. + + .. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size) .. warning:: diff --git a/Doc/c-api/tuple.rst b/Doc/c-api/tuple.rst index 815afddad19df1..be960243f76366 100644 --- a/Doc/c-api/tuple.rst +++ b/Doc/c-api/tuple.rst @@ -48,7 +48,7 @@ Tuple Objects .. c:function:: Py_ssize_t PyTuple_Size(PyObject *p) Take a pointer to a tuple object, and return the size of that tuple. - On error, return ``-1`` and with an exception set. + On error, return ``-1`` with an exception set. .. c:function:: Py_ssize_t PyTuple_GET_SIZE(PyObject *p) diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index ec2867b0ce09ba..2f2060d0582251 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -116,6 +116,20 @@ Type Objects .. versionadded:: 3.12 +.. c:function:: int PyType_Unwatch(int watcher_id, PyObject *type) + + Mark *type* as not watched. This undoes a previous call to + :c:func:`PyType_Watch`. *type* must not be ``NULL``. + + An extension should never call this function with a *watcher_id* that was + not returned to it by a previous call to :c:func:`PyType_AddWatcher`. + + On success, this function returns ``0``. On failure, this function returns + ``-1`` with an exception set. + + .. versionadded:: 3.12 + + .. c:type:: int (*PyType_WatchCallback)(PyObject *type) Type of a type-watcher callback function. @@ -133,6 +147,18 @@ Type Objects Type features are denoted by single bit flags. +.. c:function:: int PyType_FastSubclass(PyTypeObject *type, int flag) + + Return non-zero if the type object *type* sets the subclass flag *flag*. + Subclass flags are denoted by + :c:macro:`Py_TPFLAGS_*_SUBCLASS `. + This function is used by many ``_Check`` functions for common types. + + .. seealso:: + :c:func:`PyObject_TypeCheck`, which is used as a slower alternative in + ``_Check`` functions for types that don't come with subclass flags. + + .. c:function:: int PyType_IS_GC(PyTypeObject *o) Return true if the type object includes support for the cycle detector; this @@ -151,14 +177,31 @@ Type Objects .. c:function:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) - Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type object. Use - Python's default memory allocation mechanism to allocate a new instance and - initialize all its contents to ``NULL``. + Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type + object. Uses Python's default memory allocation mechanism to allocate memory + for a new instance, zeros the memory, then initializes the memory as if by + calling :c:func:`PyObject_Init` or :c:func:`PyObject_InitVar`. + + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + For types that support garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set), this function behaves like + :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar` (except the + memory is guaranteed to be zeroed before initialization), and should be + paired with :c:func:`PyObject_GC_Del` in :c:member:`~PyTypeObject.tp_free`. + Otherwise, it behaves like :c:macro:`PyObject_New` or + :c:macro:`PyObject_NewVar` (except the memory is guaranteed to be zeroed + before initialization) and should be paired with :c:func:`PyObject_Free` in + :c:member:`~PyTypeObject.tp_free`. + .. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) - Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type object. Create a - new instance using the type's :c:member:`~PyTypeObject.tp_alloc` slot. + Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type + object. Creates a new instance using the type's + :c:member:`~PyTypeObject.tp_alloc` slot and returns the resulting object. + .. c:function:: int PyType_Ready(PyTypeObject *type) @@ -176,6 +219,7 @@ Type Objects GC protocol itself by at least implementing the :c:member:`~PyTypeObject.tp_traverse` handle. + .. c:function:: PyObject* PyType_GetName(PyTypeObject *type) Return the type's name. Equivalent to getting the type's @@ -183,6 +227,7 @@ Type Objects .. versionadded:: 3.11 + .. c:function:: PyObject* PyType_GetQualName(PyTypeObject *type) Return the type's qualified name. Equivalent to getting the @@ -198,6 +243,7 @@ Type Objects .. versionadded:: 3.13 + .. c:function:: PyObject* PyType_GetModuleName(PyTypeObject *type) Return the type's module name. Equivalent to getting the @@ -205,6 +251,7 @@ Type Objects .. versionadded:: 3.13 + .. c:function:: void* PyType_GetSlot(PyTypeObject *type, int slot) Return the function pointer stored in the given slot. If the @@ -221,6 +268,7 @@ Type Objects :c:func:`PyType_GetSlot` can now accept all types. Previously, it was limited to :ref:`heap types `. + .. c:function:: PyObject* PyType_GetModule(PyTypeObject *type) Return the module object associated with the given type when the type was @@ -240,6 +288,7 @@ Type Objects .. versionadded:: 3.9 + .. c:function:: void* PyType_GetModuleState(PyTypeObject *type) Return the state of the module object associated with the given type. @@ -254,6 +303,7 @@ Type Objects .. versionadded:: 3.9 + .. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) Find the first superclass whose module was created from @@ -267,8 +317,13 @@ Type Objects and other places where a method's defining class cannot be passed using the :c:type:`PyCMethod` calling convention. + The returned reference is :term:`borrowed ` from *type*, + and will be valid as long as you hold a reference to *type*. + Do not release it with :c:func:`Py_DECREF` or similar. + .. versionadded:: 3.11 + .. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) Find the first superclass in *type*'s :term:`method resolution order` whose @@ -287,6 +342,7 @@ Type Objects .. versionadded:: 3.14 + .. c:function:: int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) Attempt to assign a version tag to the given type. @@ -297,6 +353,16 @@ Type Objects .. versionadded:: 3.12 +.. c:function:: int PyType_SUPPORTS_WEAKREFS(PyTypeObject *type) + + Return true if instances of *type* support creating weak references, false + otherwise. This function always succeeds. *type* must not be ``NULL``. + + .. seealso:: + * :ref:`weakrefobjects` + * :py:mod:`weakref` + + Creating Heap-Allocated Types ............................. @@ -317,8 +383,8 @@ The following functions and structs are used to create The *bases* argument can be used to specify base classes; it can either be only one class or a tuple of classes. - If *bases* is ``NULL``, the *Py_tp_bases* slot is used instead. - If that also is ``NULL``, the *Py_tp_base* slot is used instead. + If *bases* is ``NULL``, the :c:data:`Py_tp_bases` slot is used instead. + If that also is ``NULL``, the :c:data:`Py_tp_base` slot is used instead. If that also is ``NULL``, the new type derives from :class:`object`. The *module* argument can be used to record the module in which the new @@ -345,6 +411,7 @@ The following functions and structs are used to create .. versionadded:: 3.12 + .. c:function:: PyObject* PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) Equivalent to ``PyType_FromMetaclass(NULL, module, spec, bases)``. @@ -371,6 +438,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: PyObject* PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) Equivalent to ``PyType_FromMetaclass(NULL, NULL, spec, bases)``. @@ -392,6 +460,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: PyObject* PyType_FromSpec(PyType_Spec *spec) Equivalent to ``PyType_FromMetaclass(NULL, NULL, spec, NULL)``. @@ -412,6 +481,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: int PyType_Freeze(PyTypeObject *type) Make a type immutable: set the :c:macro:`Py_TPFLAGS_IMMUTABLETYPE` flag. @@ -520,9 +590,9 @@ The following functions and structs are used to create :c:type:`PyAsyncMethods` with an added ``Py_`` prefix. For example, use: - * ``Py_tp_dealloc`` to set :c:member:`PyTypeObject.tp_dealloc` - * ``Py_nb_add`` to set :c:member:`PyNumberMethods.nb_add` - * ``Py_sq_length`` to set :c:member:`PySequenceMethods.sq_length` + * :c:data:`Py_tp_dealloc` to set :c:member:`PyTypeObject.tp_dealloc` + * :c:data:`Py_nb_add` to set :c:member:`PyNumberMethods.nb_add` + * :c:data:`Py_sq_length` to set :c:member:`PySequenceMethods.sq_length` An additional slot is supported that does not correspond to a :c:type:`!PyTypeObject` struct field: @@ -541,7 +611,7 @@ The following functions and structs are used to create If it is not possible to switch to a ``MANAGED`` flag (for example, for vectorcall or to support Python older than 3.12), specify the - offset in :c:member:`Py_tp_members `. + offset in :c:data:`Py_tp_members`. See :ref:`PyMemberDef documentation ` for details. @@ -568,8 +638,8 @@ The following functions and structs are used to create under the :ref:`limited API `. .. versionchanged:: 3.14 - The field :c:member:`~PyTypeObject.tp_vectorcall` can now set - using ``Py_tp_vectorcall``. See the field's documentation + The field :c:member:`~PyTypeObject.tp_vectorcall` can now be set + using :c:data:`Py_tp_vectorcall`. See the field's documentation for details. .. c:member:: void *pfunc @@ -579,10 +649,11 @@ The following functions and structs are used to create *pfunc* values may not be ``NULL``, except for the following slots: - * ``Py_tp_doc`` + * :c:data:`Py_tp_doc` * :c:data:`Py_tp_token` (for clarity, prefer :c:data:`Py_TP_USE_SPEC` rather than ``NULL``) + .. c:macro:: Py_tp_token A :c:member:`~PyType_Slot.slot` that records a static memory layout ID diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 3b9f07778d5ace..42bfbeccff1429 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -79,7 +79,7 @@ Quick Reference | :c:member:`~PyTypeObject.tp_setattro` | :c:type:`setattrofunc` | __setattr__, | X | X | | G | | | | __delattr__ | | | | | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ - | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | | | | | % | + | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | :ref:`sub-slots` | | | | % | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ | :c:member:`~PyTypeObject.tp_flags` | unsigned long | | X | X | | ? | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ @@ -325,9 +325,10 @@ sub-slots +---------------------------------------------------------+-----------------------------------+---------------+ | | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | | + | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | __buffer__ | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | | + | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | __release_\ | + | | | buffer\__ | +---------------------------------------------------------+-----------------------------------+---------------+ .. _slot-typedefs-table: @@ -491,9 +492,9 @@ metatype) initializes :c:member:`~PyTypeObject.tp_itemsize`, which means that it type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. -.. c:member:: Py_ssize_t PyObject.ob_refcnt +:c:member:`PyObject.ob_refcnt` - This is the type object's reference count, initialized to ``1`` by the + The type object's reference count is initialized to ``1`` by the ``PyObject_HEAD_INIT`` macro. Note that for :ref:`statically allocated type objects `, the type's instances (objects whose :c:member:`~PyObject.ob_type` points back to the type) do *not* count as references. But for @@ -505,7 +506,7 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. This field is not inherited by subtypes. -.. c:member:: PyTypeObject* PyObject.ob_type +:c:member:`PyObject.ob_type` This is the type's type, in other words its metatype. It is initialized by the argument to the ``PyObject_HEAD_INIT`` macro, and its value should normally be @@ -531,14 +532,13 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. PyVarObject Slots ----------------- -.. c:member:: Py_ssize_t PyVarObject.ob_size +:c:member:`PyVarObject.ob_size` For :ref:`statically allocated type objects `, this should be initialized to zero. For :ref:`dynamically allocated type objects `, this field has a special internal meaning. - This field should be accessed using the :c:func:`Py_SIZE()` and - :c:func:`Py_SET_SIZE()` macros. + This field should be accessed using the :c:func:`Py_SIZE()` macro. **Inheritance:** @@ -676,77 +676,144 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_dealloc - A pointer to the instance destructor function. This function must be defined - unless the type guarantees that its instances will never be deallocated (as is - the case for the singletons ``None`` and ``Ellipsis``). The function signature is:: + .. corresponding-type-slot:: Py_tp_dealloc + + A pointer to the instance destructor function. The function signature is:: void tp_dealloc(PyObject *self); - The destructor function is called by the :c:func:`Py_DECREF` and - :c:func:`Py_XDECREF` macros when the new reference count is zero. At this point, - the instance is still in existence, but there are no references to it. The - destructor function should free all references which the instance owns, free all - memory buffers owned by the instance (using the freeing function corresponding - to the allocation function used to allocate the buffer), and call the type's - :c:member:`~PyTypeObject.tp_free` function. If the type is not subtypable - (doesn't have the :c:macro:`Py_TPFLAGS_BASETYPE` flag bit set), it is - permissible to call the object deallocator directly instead of via - :c:member:`~PyTypeObject.tp_free`. The object deallocator should be the one used to allocate the - instance; this is normally :c:func:`PyObject_Free` if the instance was allocated - using :c:macro:`PyObject_New` or :c:macro:`PyObject_NewVar`, or - :c:func:`PyObject_GC_Del` if the instance was allocated using - :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. - - If the type supports garbage collection (has the :c:macro:`Py_TPFLAGS_HAVE_GC` - flag bit set), the destructor should call :c:func:`PyObject_GC_UnTrack` - before clearing any member fields. + The destructor function should remove all references which the instance owns + (e.g., call :c:func:`Py_CLEAR`), free all memory buffers owned by the + instance, and call the type's :c:member:`~PyTypeObject.tp_free` function to + free the object itself. + + If you may call functions that may set the error indicator, you must use + :c:func:`PyErr_GetRaisedException` and :c:func:`PyErr_SetRaisedException` + to ensure you don't clobber a preexisting error indicator (the deallocation + could have occurred while processing a different error): .. code-block:: c static void - foo_dealloc(PyObject *op) + foo_dealloc(foo_object *self) { + PyObject *et, *ev, *etb; + PyObject *exc = PyErr_GetRaisedException(); + ... + PyErr_SetRaisedException(exc); + } + + The dealloc handler itself must not raise an exception; if it hits an error + case it should call :c:func:`PyErr_FormatUnraisable` to log (and clear) an + unraisable exception. + + No guarantees are made about when an object is destroyed, except: + + * Python will destroy an object immediately or some time after the final + reference to the object is deleted, unless its finalizer + (:c:member:`~PyTypeObject.tp_finalize`) subsequently resurrects the + object. + * An object will not be destroyed while it is being automatically finalized + (:c:member:`~PyTypeObject.tp_finalize`) or automatically cleared + (:c:member:`~PyTypeObject.tp_clear`). + + CPython currently destroys an object immediately from :c:func:`Py_DECREF` + when the new reference count is zero, but this may change in a future + version. + + It is recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is always + finalized before destruction. + + If the type supports garbage collection (the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set), the destructor should call :c:func:`PyObject_GC_UnTrack` + before clearing any member fields. + + It is permissible to call :c:member:`~PyTypeObject.tp_clear` from + :c:member:`!tp_dealloc` to reduce code duplication and to guarantee that the + object is always cleared before destruction. Beware that + :c:member:`!tp_clear` might have already been called. + + If the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the + deallocator should release the owned reference to its type object (via + :c:func:`Py_DECREF`) after calling the type deallocator. See the example + code below.:: + + static void + foo_dealloc(PyObject *op) + { foo_object *self = (foo_object *) op; PyObject_GC_UnTrack(self); Py_CLEAR(self->ref); Py_TYPE(self)->tp_free(self); - } + } - Finally, if the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the - deallocator should release the owned reference to its type object - (via :c:func:`Py_DECREF`) after - calling the type deallocator. In order to avoid dangling pointers, the - recommended way to achieve this is: + :c:member:`!tp_dealloc` must leave the exception status unchanged. If it + needs to call something that might raise an exception, the exception state + must be backed up first and restored later (after logging any exceptions + with :c:func:`PyErr_WriteUnraisable`). - .. code-block:: c + Example:: - static void - foo_dealloc(PyObject *op) - { - PyTypeObject *tp = Py_TYPE(op); - // free references and buffers here - tp->tp_free(op); - Py_DECREF(tp); - } + static void + foo_dealloc(PyObject *self) + { + PyObject *exc = PyErr_GetRaisedException(); - .. warning:: + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // self was resurrected. + goto done; + } + + PyTypeObject *tp = Py_TYPE(self); + + if (tp->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_UnTrack(self); + } - In a garbage collected Python, :c:member:`!tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the - object becomes part of a refcount cycle, that cycle might be collected by - a garbage collection on any thread). This is not a problem for Python - API calls, since the thread on which :c:member:`!tp_dealloc` is called - with an :term:`attached thread state`. However, if the object being - destroyed in turn destroys objects from some other C or C++ library, care - should be taken to ensure that destroying those objects on the thread - which called :c:member:`!tp_dealloc` will not violate any assumptions of - the library. + // Optional, but convenient to avoid code duplication. + if (tp->tp_clear && tp->tp_clear(self) < 0) { + PyErr_WriteUnraisable(self); + } + + // Any additional destruction goes here. + + tp->tp_free(self); + self = NULL; // In case PyErr_WriteUnraisable() is called below. + + if (tp->tp_flags & Py_TPFLAGS_HEAPTYPE) { + Py_CLEAR(tp); + } + + done: + // Optional, if something was called that might have raised an + // exception. + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(self); + } + PyErr_SetRaisedException(exc); + } + + :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + with an :term:`attached thread state`. However, if the object being + destroyed in turn destroys objects from some other C library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. **Inheritance:** This field is inherited by subtypes. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: Py_ssize_t PyTypeObject.tp_vectorcall_offset @@ -795,6 +862,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getattrfunc PyTypeObject.tp_getattr + .. corresponding-type-slot:: Py_tp_getattr + An optional pointer to the get-attribute-string function. This field is deprecated. When it is defined, it should point to a function @@ -812,6 +881,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: setattrfunc PyTypeObject.tp_setattr + .. corresponding-type-slot:: Py_tp_setattr + An optional pointer to the function for setting and deleting attributes. This field is deprecated. When it is defined, it should point to a function @@ -844,6 +915,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: reprfunc PyTypeObject.tp_repr + .. corresponding-type-slot:: Py_tp_repr + .. index:: pair: built-in function; repr An optional pointer to a function that implements the built-in function @@ -909,6 +982,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: hashfunc PyTypeObject.tp_hash + .. corresponding-type-slot:: Py_tp_hash + .. index:: pair: built-in function; hash An optional pointer to a function that implements the built-in function @@ -950,6 +1025,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: ternaryfunc PyTypeObject.tp_call + .. corresponding-type-slot:: Py_tp_call + An optional pointer to a function that implements calling the object. This should be ``NULL`` if the object is not callable. The signature is the same as for :c:func:`PyObject_Call`:: @@ -963,6 +1040,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: reprfunc PyTypeObject.tp_str + .. corresponding-type-slot:: Py_tp_str + An optional pointer to a function that implements the built-in operation :func:`str`. (Note that :class:`str` is a type now, and :func:`str` calls the constructor for that type. This constructor calls :c:func:`PyObject_Str` to do @@ -988,6 +1067,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getattrofunc PyTypeObject.tp_getattro + .. corresponding-type-slot:: Py_tp_getattro + An optional pointer to the get-attribute function. The signature is the same as for :c:func:`PyObject_GetAttr`:: @@ -1012,6 +1093,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: setattrofunc PyTypeObject.tp_setattro + .. corresponding-type-slot:: Py_tp_setattro + An optional pointer to the function for setting and deleting attributes. The signature is the same as for :c:func:`PyObject_SetAttr`:: @@ -1137,11 +1220,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_HAVE_GC This bit is set when the object supports garbage collection. If this bit - is set, instances must be created using :c:macro:`PyObject_GC_New` and - destroyed using :c:func:`PyObject_GC_Del`. More information in section - :ref:`supporting-cycle-detection`. This bit also implies that the - GC-related fields :c:member:`~PyTypeObject.tp_traverse` and :c:member:`~PyTypeObject.tp_clear` are present in - the type object. + is set, memory for new instances (see :c:member:`~PyTypeObject.tp_alloc`) + must be allocated using :c:macro:`PyObject_GC_New` or + :c:func:`PyType_GenericAlloc` and deallocated (see + :c:member:`~PyTypeObject.tp_free`) using :c:func:`PyObject_GC_Del`. More + information in section :ref:`supporting-cycle-detection`. **Inheritance:** @@ -1192,7 +1275,7 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_MANAGED_DICT - This bit indicates that instances of the class have a `~object.__dict__` + This bit indicates that instances of the class have a :attr:`~object.__dict__` attribute, and that the space for the dictionary is managed by the VM. If this flag is set, :c:macro:`Py_TPFLAGS_HAVE_GC` should also be set. @@ -1253,8 +1336,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_BASE_EXC_SUBCLASS .. c:macro:: Py_TPFLAGS_TYPE_SUBCLASS - These flags are used by functions such as - :c:func:`PyLong_Check` to quickly determine if a type is a subclass + Functions such as :c:func:`PyLong_Check` will call :c:func:`PyType_FastSubclass` + with one of these flags to quickly determine if a type is a subclass of a built-in type; such specific checks are faster than a generic check, like :c:func:`PyObject_IsInstance`. Custom types that inherit from built-ins should have their :c:member:`~PyTypeObject.tp_flags` @@ -1395,6 +1478,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: const char* PyTypeObject.tp_doc + .. corresponding-type-slot:: Py_tp_doc + An optional pointer to a NUL-terminated C string giving the docstring for this type object. This is exposed as the :attr:`~type.__doc__` attribute on the type and instances of the type. @@ -1406,6 +1491,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: traverseproc PyTypeObject.tp_traverse + .. corresponding-type-slot:: Py_tp_traverse + An optional pointer to a traversal function for the garbage collector. This is only used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: @@ -1467,6 +1554,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) but the instance has no strong reference to the elements inside it, as they are allowed to be removed even if the instance is still alive). + .. warning:: + The traversal function must not have any side effects. It must not + modify the reference counts of any Python objects nor create or destroy + any Python objects. + Note that :c:func:`Py_VISIT` requires the *visit* and *arg* parameters to :c:func:`!local_traverse` to have these specific names; don't name them just anything. @@ -1478,6 +1570,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) heap-allocated superclass). If they do not, the type object may not be garbage-collected. + .. note:: + + The :c:member:`~PyTypeObject.tp_traverse` function can be called from any + thread. + .. versionchanged:: 3.9 Heap-allocated types are expected to visit ``Py_TYPE(self)`` in @@ -1497,20 +1594,103 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_clear - An optional pointer to a clear function for the garbage collector. This is only - used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: + .. corresponding-type-slot:: Py_tp_clear + + An optional pointer to a clear function. The signature is:: int tp_clear(PyObject *); - The :c:member:`~PyTypeObject.tp_clear` member function is used to break reference cycles in cyclic - garbage detected by the garbage collector. Taken together, all :c:member:`~PyTypeObject.tp_clear` - functions in the system must combine to break all reference cycles. This is - subtle, and if in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For example, - the tuple type does not implement a :c:member:`~PyTypeObject.tp_clear` function, because it's - possible to prove that no reference cycle can be composed entirely of tuples. - Therefore the :c:member:`~PyTypeObject.tp_clear` functions of other types must be sufficient to - break any cycle containing a tuple. This isn't immediately obvious, and there's - rarely a good reason to avoid implementing :c:member:`~PyTypeObject.tp_clear`. + The purpose of this function is to break reference cycles that are causing a + :term:`cyclic isolate` so that the objects can be safely destroyed. A + cleared object is a partially destroyed object; the object is not obligated + to satisfy design invariants held during normal use. + + :c:member:`!tp_clear` does not need to delete references to objects that + can't participate in reference cycles, such as Python strings or Python + integers. However, it may be convenient to clear all references, and write + the type's :c:member:`~PyTypeObject.tp_dealloc` function to invoke + :c:member:`!tp_clear` to avoid code duplication. (Beware that + :c:member:`!tp_clear` might have already been called. Prefer calling + idempotent functions like :c:func:`Py_CLEAR`.) + + Any non-trivial cleanup should be performed in + :c:member:`~PyTypeObject.tp_finalize` instead of :c:member:`!tp_clear`. + + .. note:: + + If :c:member:`!tp_clear` fails to break a reference cycle then the + objects in the :term:`cyclic isolate` may remain indefinitely + uncollectable ("leak"). See :data:`gc.garbage`. + + .. note:: + + Referents (direct and indirect) might have already been cleared; they are + not guaranteed to be in a consistent state. + + .. note:: + + The :c:member:`~PyTypeObject.tp_clear` function can be called from any + thread. + + .. note:: + + An object is not guaranteed to be automatically cleared before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. + + This function differs from the destructor + (:c:member:`~PyTypeObject.tp_dealloc`) in the following ways: + + * The purpose of clearing an object is to remove references to other objects + that might participate in a reference cycle. The purpose of the + destructor, on the other hand, is a superset: it must release *all* + resources it owns, including references to objects that cannot participate + in a reference cycle (e.g., integers) as well as the object's own memory + (by calling :c:member:`~PyTypeObject.tp_free`). + * When :c:member:`!tp_clear` is called, other objects might still hold + references to the object being cleared. Because of this, + :c:member:`!tp_clear` must not deallocate the object's own memory + (:c:member:`~PyTypeObject.tp_free`). The destructor, on the other hand, + is only called when no (strong) references exist, and as such, must + safely destroy the object itself by deallocating it. + * :c:member:`!tp_clear` might never be automatically called. An object's + destructor, on the other hand, will be automatically called some time + after the object becomes unreachable (i.e., either there are no references + to the object or the object is a member of a :term:`cyclic isolate`). + + No guarantees are made about when, if, or how often Python automatically + clears an object, except: + + * Python will not automatically clear an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically clear an object if it has not been + automatically finalized (see :c:member:`~PyTypeObject.tp_finalize`). (If + the finalizer resurrected the object, the object may or may not be + automatically finalized again before it is cleared.) + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically clear it if any member of the cyclic isolate has not yet + been automatically finalized (:c:member:`~PyTypeObject.tp_finalize`). + * Python will not destroy an object until after any automatic calls to its + :c:member:`!tp_clear` function have returned. This ensures that the act + of breaking a reference cycle does not invalidate the ``self`` pointer + while :c:member:`!tp_clear` is still executing. + * Python will not automatically call :c:member:`!tp_clear` multiple times + concurrently. + + CPython currently only automatically clears objects as needed to break + reference cycles in a :term:`cyclic isolate`, but future versions might + clear objects regularly before their destruction. + + Taken together, all :c:member:`~PyTypeObject.tp_clear` functions in the + system must combine to break all reference cycles. This is subtle, and if + in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For + example, the tuple type does not implement a + :c:member:`~PyTypeObject.tp_clear` function, because it's possible to prove + that no reference cycle can be composed entirely of tuples. Therefore the + :c:member:`~PyTypeObject.tp_clear` functions of other types are responsible + for breaking any cycle containing a tuple. This isn't immediately obvious, + and there's rarely a good reason to avoid implementing + :c:member:`~PyTypeObject.tp_clear`. Implementations of :c:member:`~PyTypeObject.tp_clear` should drop the instance's references to those of its members that may be Python objects, and set its pointers to those @@ -1540,23 +1720,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:func:`Py_CLEAR` macro performs the operations in a safe order. If the :c:macro:`Py_TPFLAGS_MANAGED_DICT` bit is set in the - :c:member:`~PyTypeObject.tp_flags` field, the traverse function must call + :c:member:`~PyTypeObject.tp_flags` field, the clear function must call :c:func:`PyObject_ClearManagedDict` like this:: PyObject_ClearManagedDict((PyObject*)self); - Note that :c:member:`~PyTypeObject.tp_clear` is not *always* called - before an instance is deallocated. For example, when reference counting - is enough to determine that an object is no longer used, the cyclic garbage - collector is not involved and :c:member:`~PyTypeObject.tp_dealloc` is - called directly. - - Because the goal of :c:member:`~PyTypeObject.tp_clear` functions is to break reference cycles, - it's not necessary to clear contained objects like Python strings or Python - integers, which can't participate in reference cycles. On the other hand, it may - be convenient to clear all contained Python objects, and write the type's - :c:member:`~PyTypeObject.tp_dealloc` function to invoke :c:member:`~PyTypeObject.tp_clear`. - More information about Python's garbage collection scheme can be found in section :ref:`supporting-cycle-detection`. @@ -1569,9 +1737,15 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:member:`~PyTypeObject.tp_clear` are all inherited from the base type if they are all zero in the subtype. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: richcmpfunc PyTypeObject.tp_richcompare + .. corresponding-type-slot:: Py_tp_richcompare + An optional pointer to the rich comparison function, whose signature is:: PyObject *tp_richcompare(PyObject *self, PyObject *other, int op); @@ -1674,6 +1848,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getiterfunc PyTypeObject.tp_iter + .. corresponding-type-slot:: Py_tp_iter + An optional pointer to a function that returns an :term:`iterator` for the object. Its presence normally signals that the instances of this type are :term:`iterable` (although sequences may be iterable without this function). @@ -1689,6 +1865,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: iternextfunc PyTypeObject.tp_iternext + .. corresponding-type-slot:: Py_tp_iternext + An optional pointer to a function that returns the next item in an :term:`iterator`. The signature is:: @@ -1712,6 +1890,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyMethodDef* PyTypeObject.tp_methods + .. corresponding-type-slot:: Py_tp_methods + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyMethodDef` structures, declaring regular methods of this type. @@ -1726,6 +1906,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyMemberDef* PyTypeObject.tp_members + .. corresponding-type-slot:: Py_tp_members + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyMemberDef` structures, declaring regular data members (fields or slots) of instances of this type. @@ -1741,6 +1923,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyGetSetDef* PyTypeObject.tp_getset + .. corresponding-type-slot:: Py_tp_getset + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyGetSetDef` structures, declaring computed attributes of instances of this type. @@ -1755,6 +1939,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: PyTypeObject* PyTypeObject.tp_base + .. corresponding-type-slot:: Py_tp_base + An optional pointer to a base type from which type properties are inherited. At this level, only single inheritance is supported; multiple inheritance require dynamically creating a type object by calling the metatype. @@ -1827,6 +2013,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: descrgetfunc PyTypeObject.tp_descr_get + .. corresponding-type-slot:: Py_tp_descr_get + An optional pointer to a "descriptor get" function. The function signature is:: @@ -1842,6 +2030,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: descrsetfunc PyTypeObject.tp_descr_set + .. corresponding-type-slot:: Py_tp_descr_set + An optional pointer to a function for setting and deleting a descriptor's value. @@ -1902,6 +2092,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: initproc PyTypeObject.tp_init + .. corresponding-type-slot:: Py_tp_init + An optional pointer to an instance initialization function. This function corresponds to the :meth:`~object.__init__` method of classes. Like @@ -1937,6 +2129,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: allocfunc PyTypeObject.tp_alloc + .. corresponding-type-slot:: Py_tp_alloc + An optional pointer to an instance allocation function. The function signature is:: @@ -1945,22 +2139,23 @@ and :c:data:`PyType_Type` effectively act as defaults.) **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement). + Static subtypes inherit this slot, which will be + :c:func:`PyType_GenericAlloc` if inherited from :class:`object`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - For dynamic subtypes, this field is always set to - :c:func:`PyType_GenericAlloc`, to force a standard heap - allocation strategy. + For heap subtypes, this field is always set to + :c:func:`PyType_GenericAlloc`. - For static subtypes, :c:data:`PyBaseObject_Type` uses - :c:func:`PyType_GenericAlloc`. That is the recommended value - for all statically defined types. + For static subtypes, this slot is inherited (see above). .. c:member:: newfunc PyTypeObject.tp_new + .. corresponding-type-slot:: Py_tp_new + An optional pointer to an instance creation function. The function signature is:: @@ -2000,28 +2195,39 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: freefunc PyTypeObject.tp_free + .. corresponding-type-slot:: Py_tp_free + An optional pointer to an instance deallocation function. Its signature is:: void tp_free(void *self); - An initializer that is compatible with this signature is :c:func:`PyObject_Free`. + This function must free the memory allocated by + :c:member:`~PyTypeObject.tp_alloc`. **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement) + Static subtypes inherit this slot, which will be :c:func:`PyObject_Free` if + inherited from :class:`object`. Exception: If the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and it would inherit + :c:func:`PyObject_Free`, then this slot is not inherited but instead defaults + to :c:func:`PyObject_GC_Del`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - In dynamic subtypes, this field is set to a deallocator suitable to - match :c:func:`PyType_GenericAlloc` and the value of the - :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit. + For :ref:`heap subtypes `, this slot defaults to a deallocator suitable to match + :c:func:`PyType_GenericAlloc` and the value of the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag. - For static subtypes, :c:data:`PyBaseObject_Type` uses :c:func:`PyObject_Free`. + For static subtypes, this slot is inherited (see above). .. c:member:: inquiry PyTypeObject.tp_is_gc + .. corresponding-type-slot:: Py_tp_is_gc + An optional pointer to a function called by the garbage collector. The garbage collector needs to know whether a particular object is collectible @@ -2050,12 +2256,14 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: PyObject* PyTypeObject.tp_bases + .. corresponding-type-slot:: Py_tp_bases + Tuple of base types. This field should be set to ``NULL`` and treated as read-only. Python will fill it in when the type is :c:func:`initialized `. - For dynamically created classes, the ``Py_tp_bases`` + For dynamically created classes, the :c:data:`Py_tp_bases` :c:type:`slot ` can be used instead of the *bases* argument of :c:func:`PyType_FromSpecWithBases`. The argument form is preferred. @@ -2130,6 +2338,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_del + .. corresponding-type-slot:: Py_tp_del + This field is deprecated. Use :c:member:`~PyTypeObject.tp_finalize` instead. @@ -2144,29 +2354,140 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_finalize - An optional pointer to an instance finalization function. Its signature is:: + .. corresponding-type-slot:: Py_tp_finalize + + An optional pointer to an instance finalization function. This is the C + implementation of the :meth:`~object.__del__` special method. Its signature + is:: void tp_finalize(PyObject *self); - If :c:member:`~PyTypeObject.tp_finalize` is set, the interpreter calls it once when - finalizing an instance. It is called either from the garbage - collector (if the instance is part of an isolated reference cycle) or - just before the object is deallocated. Either way, it is guaranteed - to be called before attempting to break reference cycles, ensuring - that it finds the object in a sane state. + The primary purpose of finalization is to perform any non-trivial cleanup + that must be performed before the object is destroyed, while the object and + any other objects it directly or indirectly references are still in a + consistent state. The finalizer is allowed to execute + arbitrary Python code. - :c:member:`~PyTypeObject.tp_finalize` should not mutate the current exception status; - therefore, a recommended way to write a non-trivial finalizer is:: + Before Python automatically finalizes an object, some of the object's direct + or indirect referents might have themselves been automatically finalized. + However, none of the referents will have been automatically cleared + (:c:member:`~PyTypeObject.tp_clear`) yet. + + Other non-finalized objects might still be using a finalized object, so the + finalizer must leave the object in a sane state (e.g., invariants are still + met). + + .. note:: + + After Python automatically finalizes an object, Python might start + automatically clearing (:c:member:`~PyTypeObject.tp_clear`) the object + and its referents (direct and indirect). Cleared objects are not + guaranteed to be in a consistent state; a finalized object must be able + to tolerate cleared referents. + + .. note:: + + An object is not guaranteed to be automatically finalized before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. It is + recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is + always finalized before destruction. + + .. note:: + + The :c:member:`~PyTypeObject.tp_finalize` function can be called from any + thread, although the :term:`GIL` will be held. + + .. note:: + + The :c:member:`!tp_finalize` function can be called during shutdown, + after some global variables have been deleted. See the documentation of + the :meth:`~object.__del__` method for details. + + When Python finalizes an object, it behaves like the following algorithm: + + #. Python might mark the object as *finalized*. Currently, Python always + marks objects whose type supports garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and never marks other types of + objects; this might change in a future version. + #. If the object is not marked as *finalized* and its + :c:member:`!tp_finalize` finalizer function is non-``NULL``, the + finalizer function is called. + #. If the finalizer function was called and the finalizer made the object + reachable (i.e., there is a reference to the object and it is not a + member of a :term:`cyclic isolate`), then the finalizer is said to have + *resurrected* the object. It is unspecified whether the finalizer can + also resurrect the object by adding a new reference to the object that + does not make it reachable, i.e., the object is (still) a member of a + cyclic isolate. + #. If the finalizer resurrected the object, the object's pending destruction + is canceled and the object's *finalized* mark might be removed if + present. Currently, Python never removes the *finalized* mark; this + might change in a future version. + + *Automatic finalization* refers to any finalization performed by Python + except via calls to :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. No guarantees are made about + when, if, or how often an object is automatically finalized, except: + + * Python will not automatically finalize an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically finalize an object if finalizing it would + not mark the object as *finalized*. Currently, this applies to objects + whose type does not support garbage collection, i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is not set. Such objects can still be + manually finalized by calling :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. + * Python will not automatically finalize any two members of a :term:`cyclic + isolate` concurrently. + * Python will not automatically finalize an object after it has + automatically cleared (:c:member:`~PyTypeObject.tp_clear`) the object. + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically finalize it after automatically clearing (see + :c:member:`~PyTypeObject.tp_clear`) any other member. + * Python will automatically finalize every member of a :term:`cyclic + isolate` before it automatically clears (see + :c:member:`~PyTypeObject.tp_clear`) any of them. + * If Python is going to automatically clear an object + (:c:member:`~PyTypeObject.tp_clear`), it will automatically finalize the + object first. + + Python currently only automatically finalizes objects that are members of a + :term:`cyclic isolate`, but future versions might finalize objects regularly + before their destruction. + + To manually finalize an object, do not call this function directly; call + :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc` instead. + + :c:member:`~PyTypeObject.tp_finalize` should leave the current exception + status unchanged. The recommended way to write a non-trivial finalizer is + to back up the exception at the beginning by calling + :c:func:`PyErr_GetRaisedException` and restore the exception at the end by + calling :c:func:`PyErr_SetRaisedException`. If an exception is encountered + in the middle of the finalizer, log and clear it with + :c:func:`PyErr_WriteUnraisable` or :c:func:`PyErr_FormatUnraisable`. For + example:: static void - local_finalize(PyObject *self) + foo_finalize(PyObject *self) { - /* Save the current exception, if any. */ + // Save the current exception, if any. PyObject *exc = PyErr_GetRaisedException(); - /* ... */ + // ... - /* Restore the saved exception. */ + if (do_something_that_might_raise() != success_indicator) { + PyErr_WriteUnraisable(self); + goto done; + } + + done: + // Restore the saved exception. This silently discards any exception + // raised above, so be sure to call PyErr_WriteUnraisable first if + // necessary. PyErr_SetRaisedException(exc); } @@ -2182,11 +2503,19 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:macro:`Py_TPFLAGS_HAVE_FINALIZE` flags bit in order for this field to be used. This is no longer required. - .. seealso:: "Safe object finalization" (:pep:`442`) + .. seealso:: + + * :pep:`442`: "Safe object finalization" + * :ref:`life-cycle` for details about how this slot relates to other + slots. + * :c:func:`PyObject_CallFinalizer` + * :c:func:`PyObject_CallFinalizerFromDealloc` .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall + .. corresponding-type-slot:: Py_tp_vectorcall + A :ref:`vectorcall function ` to use for calls of this type object (rather than instances). In other words, ``tp_vectorcall`` can be used to optimize ``type.__call__``, @@ -2352,42 +2681,148 @@ Number Object Structures Python 3.0.1. .. c:member:: binaryfunc PyNumberMethods.nb_add + + .. corresponding-type-slot:: Py_nb_add + .. c:member:: binaryfunc PyNumberMethods.nb_subtract + + .. corresponding-type-slot:: Py_nb_subtract + .. c:member:: binaryfunc PyNumberMethods.nb_multiply + + .. corresponding-type-slot:: Py_nb_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_remainder + + .. corresponding-type-slot:: Py_nb_remainder + .. c:member:: binaryfunc PyNumberMethods.nb_divmod + + .. corresponding-type-slot:: Py_nb_divmod + .. c:member:: ternaryfunc PyNumberMethods.nb_power + + .. corresponding-type-slot:: Py_nb_power + .. c:member:: unaryfunc PyNumberMethods.nb_negative + + .. corresponding-type-slot:: Py_nb_negative + .. c:member:: unaryfunc PyNumberMethods.nb_positive + + .. corresponding-type-slot:: Py_nb_positive + .. c:member:: unaryfunc PyNumberMethods.nb_absolute + + .. corresponding-type-slot:: Py_nb_absolute + .. c:member:: inquiry PyNumberMethods.nb_bool + + .. corresponding-type-slot:: Py_nb_bool + .. c:member:: unaryfunc PyNumberMethods.nb_invert + + .. corresponding-type-slot:: Py_nb_invert + .. c:member:: binaryfunc PyNumberMethods.nb_lshift + + .. corresponding-type-slot:: Py_nb_lshift + .. c:member:: binaryfunc PyNumberMethods.nb_rshift + + .. corresponding-type-slot:: Py_nb_rshift + .. c:member:: binaryfunc PyNumberMethods.nb_and + + .. corresponding-type-slot:: Py_nb_and + .. c:member:: binaryfunc PyNumberMethods.nb_xor + + .. corresponding-type-slot:: Py_nb_xor + .. c:member:: binaryfunc PyNumberMethods.nb_or + + .. corresponding-type-slot:: Py_nb_or + .. c:member:: unaryfunc PyNumberMethods.nb_int + + .. corresponding-type-slot:: Py_nb_int + .. c:member:: void *PyNumberMethods.nb_reserved + .. c:member:: unaryfunc PyNumberMethods.nb_float + + .. corresponding-type-slot:: Py_nb_float + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_add + + .. corresponding-type-slot:: Py_nb_inplace_add + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_subtract + + .. corresponding-type-slot:: Py_nb_inplace_subtract + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_multiply + + .. corresponding-type-slot:: Py_nb_inplace_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_remainder + + .. corresponding-type-slot:: Py_nb_inplace_remainder + .. c:member:: ternaryfunc PyNumberMethods.nb_inplace_power + + .. corresponding-type-slot:: Py_nb_inplace_power + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_lshift + + .. corresponding-type-slot:: Py_nb_inplace_lshift + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_rshift + + .. corresponding-type-slot:: Py_nb_inplace_rshift + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_and + + .. corresponding-type-slot:: Py_nb_inplace_and + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_xor + + .. corresponding-type-slot:: Py_nb_inplace_xor + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_or + + .. corresponding-type-slot:: Py_nb_inplace_or + .. c:member:: binaryfunc PyNumberMethods.nb_floor_divide + + .. corresponding-type-slot:: Py_nb_floor_divide + .. c:member:: binaryfunc PyNumberMethods.nb_true_divide + + .. corresponding-type-slot:: Py_nb_true_divide + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_floor_divide + + .. corresponding-type-slot:: Py_nb_inplace_floor_divide + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_true_divide + + .. corresponding-type-slot:: Py_nb_inplace_true_divide + .. c:member:: unaryfunc PyNumberMethods.nb_index + + .. corresponding-type-slot:: Py_nb_index + .. c:member:: binaryfunc PyNumberMethods.nb_matrix_multiply + + .. corresponding-type-slot:: Py_nb_matrix_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_matrix_multiply + .. corresponding-type-slot:: Py_nb_inplace_matrix_multiply + + .. _mapping-structs: @@ -2404,12 +2839,16 @@ Mapping Object Structures .. c:member:: lenfunc PyMappingMethods.mp_length + .. corresponding-type-slot:: Py_mp_length + This function is used by :c:func:`PyMapping_Size` and :c:func:`PyObject_Size`, and has the same signature. This slot may be set to ``NULL`` if the object has no defined length. .. c:member:: binaryfunc PyMappingMethods.mp_subscript + .. corresponding-type-slot:: Py_mp_subscript + This function is used by :c:func:`PyObject_GetItem` and :c:func:`PySequence_GetSlice`, and has the same signature as :c:func:`!PyObject_GetItem`. This slot must be filled for the @@ -2418,6 +2857,8 @@ Mapping Object Structures .. c:member:: objobjargproc PyMappingMethods.mp_ass_subscript + .. corresponding-type-slot:: Py_mp_ass_subscript + This function is used by :c:func:`PyObject_SetItem`, :c:func:`PyObject_DelItem`, :c:func:`PySequence_SetSlice` and :c:func:`PySequence_DelSlice`. It has the same signature as @@ -2441,6 +2882,8 @@ Sequence Object Structures .. c:member:: lenfunc PySequenceMethods.sq_length + .. corresponding-type-slot:: Py_sq_length + This function is used by :c:func:`PySequence_Size` and :c:func:`PyObject_Size`, and has the same signature. It is also used for handling negative indices via the :c:member:`~PySequenceMethods.sq_item` @@ -2448,18 +2891,24 @@ Sequence Object Structures .. c:member:: binaryfunc PySequenceMethods.sq_concat + .. corresponding-type-slot:: Py_sq_concat + This function is used by :c:func:`PySequence_Concat` and has the same signature. It is also used by the ``+`` operator, after trying the numeric addition via the :c:member:`~PyNumberMethods.nb_add` slot. .. c:member:: ssizeargfunc PySequenceMethods.sq_repeat + .. corresponding-type-slot:: Py_sq_repeat + This function is used by :c:func:`PySequence_Repeat` and has the same signature. It is also used by the ``*`` operator, after trying numeric multiplication via the :c:member:`~PyNumberMethods.nb_multiply` slot. .. c:member:: ssizeargfunc PySequenceMethods.sq_item + .. corresponding-type-slot:: Py_sq_item + This function is used by :c:func:`PySequence_GetItem` and has the same signature. It is also used by :c:func:`PyObject_GetItem`, after trying the subscription via the :c:member:`~PyMappingMethods.mp_subscript` slot. @@ -2473,6 +2922,8 @@ Sequence Object Structures .. c:member:: ssizeobjargproc PySequenceMethods.sq_ass_item + .. corresponding-type-slot:: Py_sq_ass_item + This function is used by :c:func:`PySequence_SetItem` and has the same signature. It is also used by :c:func:`PyObject_SetItem` and :c:func:`PyObject_DelItem`, after trying the item assignment and deletion @@ -2482,6 +2933,8 @@ Sequence Object Structures .. c:member:: objobjproc PySequenceMethods.sq_contains + .. corresponding-type-slot:: Py_sq_contains + This function may be used by :c:func:`PySequence_Contains` and has the same signature. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_Contains` simply traverses the sequence until it @@ -2489,6 +2942,8 @@ Sequence Object Structures .. c:member:: binaryfunc PySequenceMethods.sq_inplace_concat + .. corresponding-type-slot:: Py_sq_inplace_concat + This function is used by :c:func:`PySequence_InPlaceConcat` and has the same signature. It should modify its first operand, and return it. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_InPlaceConcat` @@ -2498,6 +2953,8 @@ Sequence Object Structures .. c:member:: ssizeargfunc PySequenceMethods.sq_inplace_repeat + .. corresponding-type-slot:: Py_sq_inplace_repeat + This function is used by :c:func:`PySequence_InPlaceRepeat` and has the same signature. It should modify its first operand, and return it. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_InPlaceRepeat` @@ -2523,6 +2980,8 @@ Buffer Object Structures .. c:member:: getbufferproc PyBufferProcs.bf_getbuffer + .. corresponding-type-slot:: Py_bf_getbuffer + The signature of this function is:: int (PyObject *exporter, Py_buffer *view, int flags); @@ -2572,6 +3031,8 @@ Buffer Object Structures .. c:member:: releasebufferproc PyBufferProcs.bf_releasebuffer + .. corresponding-type-slot:: Py_bf_releasebuffer + The signature of this function is:: void (PyObject *exporter, Py_buffer *view); @@ -2626,6 +3087,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_await + .. corresponding-type-slot:: Py_am_await + The signature of this function is:: PyObject *am_await(PyObject *self); @@ -2637,6 +3100,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_aiter + .. corresponding-type-slot:: Py_am_aiter + The signature of this function is:: PyObject *am_aiter(PyObject *self); @@ -2649,6 +3114,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_anext + .. corresponding-type-slot:: Py_am_anext + The signature of this function is:: PyObject *am_anext(PyObject *self); @@ -2659,6 +3126,8 @@ Async Object Structures .. c:member:: sendfunc PyAsyncMethods.am_send + .. corresponding-type-slot:: Py_am_send + The signature of this function is:: PySendResult am_send(PyObject *self, PyObject *arg, PyObject **result); diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 95987e872ce639..9fa88915c3c023 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -305,12 +305,22 @@ These APIs can be used to work with surrogates: Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``). +.. c:function:: Py_UCS4 Py_UNICODE_HIGH_SURROGATE(Py_UCS4 ch) + + Return the high UTF-16 surrogate (``0xD800`` to ``0xDBFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + +.. c:function:: Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) + + Return the low UTF-16 surrogate (``0xDC00`` to ``0xDFFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + .. c:function:: Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) Join two surrogate code points and return a single :c:type:`Py_UCS4` value. *high* and *low* are respectively the leading and trailing surrogates in a - surrogate pair. *high* must be in the range [0xD800; 0xDBFF] and *low* must - be in the range [0xDC00; 0xDFFF]. + surrogate pair. *high* must be in the range ``[0xD800; 0xDBFF]`` and *low* must + be in the range ``[0xDC00; 0xDFFF]``. Creating and accessing Unicode strings @@ -645,6 +655,17 @@ APIs: difference being that it decrements the reference count of *right* by one. +.. c:function:: PyObject* PyUnicode_BuildEncodingMap(PyObject* string) + + Return a mapping suitable for decoding a custom single-byte encoding. + Given a Unicode string *string* of up to 256 characters representing an encoding + table, returns either a compact internal mapping object or a dictionary + mapping character ordinals to byte values. Raises a :exc:`TypeError` and + return ``NULL`` on invalid input. + + .. versionadded:: 3.2 + + .. c:function:: const char* PyUnicode_GetDefaultEncoding(void) Return the name of the default string encoding, ``"utf-8"``. @@ -720,7 +741,7 @@ APIs: Return ``0`` on success, ``-1`` on error with an exception set. This function checks that *unicode* is a Unicode object, that the index is - not out of bounds, and that the object's reference count is one). + not out of bounds, and that the object's reference count is one. See :c:func:`PyUnicode_WRITE` for a version that skips these checks, making them your responsibility. @@ -1450,10 +1471,6 @@ the user settings on the machine running the codec. .. versionadded:: 3.3 -Methods & Slots -""""""""""""""" - - .. _unicodemethodsandslots: Methods and Slot Functions @@ -1715,10 +1732,6 @@ They all return ``NULL`` or ``-1`` if an exception occurs. from user input, prefer calling :c:func:`PyUnicode_FromString` and :c:func:`PyUnicode_InternInPlace` directly. - .. impl-detail:: - - Strings interned this way are made :term:`immortal`. - .. c:function:: unsigned int PyUnicode_CHECK_INTERNED(PyObject *str) @@ -1795,9 +1808,24 @@ object. See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`. +.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size) + + Write the ASCII string *str* into *writer*. + + *size* is the string length in bytes. If *size* is equal to ``-1``, call + ``strlen(str)`` to get the string length. + + *str* must only contain ASCII characters. The behavior is undefined if + *str* contains non-ASCII characters. + + On success, return ``0``. + On error, set an exception, leave the writer unchanged, and return ``-1``. + + .. versionadded:: 3.14 + .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) - Writer the wide string *str* into *writer*. + Write the wide string *str* into *writer*. *size* is a number of wide characters. If *size* is equal to ``-1``, call ``wcslen(str)`` to get the string length. diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index 1ef4181d52eb10..7eb9f0b54abd4e 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -13,8 +13,9 @@ the interpreter. Several of these functions accept a start symbol from the grammar as a parameter. The available start symbols are :c:data:`Py_eval_input`, -:c:data:`Py_file_input`, and :c:data:`Py_single_input`. These are described -following the functions which accept them as parameters. +:c:data:`Py_file_input`, :c:data:`Py_single_input`, and +:c:data:`Py_func_type_input`. These are described following the functions +which accept them as parameters. Note also that several of these functions take :c:expr:`FILE*` parameters. One particular issue which needs to be handled carefully is that the :c:type:`FILE` @@ -99,18 +100,12 @@ the same library that the Python runtime is using. Otherwise, Python may not handle script file with LF line ending correctly. -.. c:function:: int PyRun_InteractiveOne(FILE *fp, const char *filename) - - This is a simplified interface to :c:func:`PyRun_InteractiveOneFlags` below, - leaving *flags* set to ``NULL``. - - -.. c:function:: int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) +.. c:function:: int PyRun_InteractiveOneObject(FILE *fp, PyObject *filename, PyCompilerFlags *flags) Read and execute a single statement from a file associated with an interactive device according to the *flags* argument. The user will be - prompted using ``sys.ps1`` and ``sys.ps2``. *filename* is decoded from the - :term:`filesystem encoding and error handler`. + prompted using ``sys.ps1`` and ``sys.ps2``. *filename* must be a Python + :class:`str` object. Returns ``0`` when the input was executed successfully, ``-1`` if there was an exception, or an error code @@ -119,6 +114,19 @@ the same library that the Python runtime is using. :file:`Python.h`, so must be included specifically if needed.) +.. c:function:: int PyRun_InteractiveOne(FILE *fp, const char *filename) + + This is a simplified interface to :c:func:`PyRun_InteractiveOneFlags` below, + leaving *flags* set to ``NULL``. + + +.. c:function:: int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) + + Similar to :c:func:`PyRun_InteractiveOneObject`, but *filename* is a + :c:expr:`const char*`, which is decoded from the + :term:`filesystem encoding and error handler`. + + .. c:function:: int PyRun_InteractiveLoop(FILE *fp, const char *filename) This is a simplified interface to :c:func:`PyRun_InteractiveLoopFlags` below, @@ -140,7 +148,7 @@ the same library that the Python runtime is using. interpreter prompt is about to become idle and wait for user input from the terminal. The return value is ignored. Overriding this hook can be used to integrate the interpreter's prompt with other - event loops, as done in the :file:`Modules/_tkinter.c` in the + event loops, as done in :file:`Modules/_tkinter.c` in the Python source code. .. versionchanged:: 3.12 @@ -183,7 +191,7 @@ the same library that the Python runtime is using. objects *globals* and *locals* with the compiler flags specified by *flags*. *globals* must be a dictionary; *locals* can be any object that implements the mapping protocol. The parameter *start* specifies - the start token that should be used to parse the source code. + the start symbol and must one of the :ref:`available start symbols `. Returns the result of executing the code as a Python object, or ``NULL`` if an exception was raised. @@ -231,9 +239,9 @@ the same library that the Python runtime is using. .. c:function:: PyObject* Py_CompileStringObject(const char *str, PyObject *filename, int start, PyCompilerFlags *flags, int optimize) Parse and compile the Python source code in *str*, returning the resulting code - object. The start token is given by *start*; this can be used to constrain the - code which can be compiled and should be :c:data:`Py_eval_input`, - :c:data:`Py_file_input`, or :c:data:`Py_single_input`. The filename specified by + object. The start symbol is given by *start*; this can be used to constrain the + code which can be compiled and should be :ref:`available start symbols + `. The filename specified by *filename* is used to construct the code object and may appear in tracebacks or :exc:`SyntaxError` exception messages. This returns ``NULL`` if the code cannot be parsed or compiled. @@ -296,32 +304,6 @@ the same library that the Python runtime is using. true on success, false on failure. -.. c:var:: int Py_eval_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for isolated expressions; for use with - :c:func:`Py_CompileString`. - - -.. c:var:: int Py_file_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for sequences of statements as read - from a file or other source; for use with :c:func:`Py_CompileString`. This is - the symbol to use when compiling arbitrarily long Python source code. - - -.. c:var:: int Py_single_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for a single statement; for use with - :c:func:`Py_CompileString`. This is the symbol used for the interactive - interpreter loop. - - .. c:struct:: PyCompilerFlags This is the structure used to hold compiler flags. In cases where code is only @@ -361,7 +343,96 @@ the same library that the Python runtime is using. :py:mod:`!ast` Python module, which exports these constants under the same names. - .. c:var:: int CO_FUTURE_DIVISION + The "``PyCF``" flags above can be combined with "``CO_FUTURE``" flags such + as :c:macro:`CO_FUTURE_ANNOTATIONS` to enable features normally + selectable using :ref:`future statements `. + See :ref:`c_codeobject_flags` for a complete list. + + +.. _start-symbols: + +Available start symbols +^^^^^^^^^^^^^^^^^^^^^^^ + + +.. c:var:: int Py_eval_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for isolated expressions; for use with + :c:func:`Py_CompileString`. + + +.. c:var:: int Py_file_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for sequences of statements as read + from a file or other source; for use with :c:func:`Py_CompileString`. This is + the symbol to use when compiling arbitrarily long Python source code. + + +.. c:var:: int Py_single_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for a single statement; for use with + :c:func:`Py_CompileString`. This is the symbol used for the interactive + interpreter loop. + + +.. c:var:: int Py_func_type_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for a function type; for use with + :c:func:`Py_CompileString`. This is used to parse "signature type comments" + from :pep:`484`. + + This requires the :c:macro:`PyCF_ONLY_AST` flag to be set. + + .. seealso:: + * :py:class:`ast.FunctionType` + * :pep:`484` + + .. versionadded:: 3.8 + + +Stack Effects +^^^^^^^^^^^^^ + +.. seealso:: + :py:func:`dis.stack_effect` + + +.. c:macro:: PY_INVALID_STACK_EFFECT + + Sentinel value representing an invalid stack effect. + + This is currently equivalent to ``INT_MAX``. + + .. versionadded:: 3.8 + + +.. c:function:: int PyCompile_OpcodeStackEffect(int opcode, int oparg) + + Compute the stack effect of *opcode* with argument *oparg*. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. + + .. versionadded:: 3.4 + + +.. c:function:: int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump) + + Similar to :c:func:`PyCompile_OpcodeStackEffect`, but don't include the + stack effect of jumping if *jump* is zero. + + If *jump* is ``0``, this will not include the stack effect of jumping, but + if *jump* is ``1`` or ``-1``, this will include it. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. - This bit can be set in *flags* to cause division operator ``/`` to be - interpreted as "true division" according to :pep:`238`. + .. versionadded:: 3.8 diff --git a/Doc/c-api/weakref.rst b/Doc/c-api/weakref.rst index c3c6cf413dcef5..45d6af1bb5a1ea 100644 --- a/Doc/c-api/weakref.rst +++ b/Doc/c-api/weakref.rst @@ -19,7 +19,14 @@ as much as it can. .. c:function:: int PyWeakref_CheckRef(PyObject *ob) - Return non-zero if *ob* is a reference object. This function always succeeds. + Return non-zero if *ob* is a reference object or a subclass of the reference + type. This function always succeeds. + + +.. c:function:: int PyWeakref_CheckRefExact(PyObject *ob) + + Return non-zero if *ob* is a reference object, but not a subclass of the + reference type. This function always succeeds. .. c:function:: int PyWeakref_CheckProxy(PyObject *ob) @@ -38,6 +45,10 @@ as much as it can. weakly referenceable object, or if *callback* is not callable, ``None``, or ``NULL``, this will return ``NULL`` and raise :exc:`TypeError`. + .. seealso:: + :c:func:`PyType_SUPPORTS_WEAKREFS` for checking if *ob* is weakly + referenceable. + .. c:function:: PyObject* PyWeakref_NewProxy(PyObject *ob, PyObject *callback) @@ -50,6 +61,10 @@ as much as it can. is not a weakly referenceable object, or if *callback* is not callable, ``None``, or ``NULL``, this will return ``NULL`` and raise :exc:`TypeError`. + .. seealso:: + :c:func:`PyType_SUPPORTS_WEAKREFS` for checking if *ob* is weakly + referenceable. + .. c:function:: int PyWeakref_GetRef(PyObject *ref, PyObject **pobj) diff --git a/Doc/conf.py b/Doc/conf.py index 467961dd5e2bff..a4275835059efa 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -79,6 +79,10 @@ rst_epilog = f""" .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |python_x_dot_y_t_literal| replace:: ``python{version}t`` +.. |python_x_dot_y_t_literal_config| replace:: ``python{version}t-config`` +.. |x_dot_y_b2_literal| replace:: ``{version}.0b2`` +.. |applications_python_version_literal| replace:: ``/Applications/Python {version}/`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` .. Apparently this how you hack together a formatted link: @@ -217,99 +221,18 @@ ('envvar', 'USER'), ('envvar', 'USERNAME'), ('envvar', 'USERPROFILE'), - # Deprecated function that was never documented: - ('py:func', 'getargspec'), - ('py:func', 'inspect.getargspec'), - # Undocumented modules that users shouldn't have to worry about - # (implementation details of `os.path`): - ('py:mod', 'ntpath'), - ('py:mod', 'posixpath'), ] # Temporary undocumented names. # In future this list must be empty. nitpick_ignore += [ - # C API: Standard Python exception classes - ('c:data', 'PyExc_ArithmeticError'), - ('c:data', 'PyExc_AssertionError'), - ('c:data', 'PyExc_AttributeError'), - ('c:data', 'PyExc_BaseException'), - ('c:data', 'PyExc_BlockingIOError'), - ('c:data', 'PyExc_BrokenPipeError'), - ('c:data', 'PyExc_BufferError'), - ('c:data', 'PyExc_ChildProcessError'), - ('c:data', 'PyExc_ConnectionAbortedError'), - ('c:data', 'PyExc_ConnectionError'), - ('c:data', 'PyExc_ConnectionRefusedError'), - ('c:data', 'PyExc_ConnectionResetError'), - ('c:data', 'PyExc_EOFError'), - ('c:data', 'PyExc_Exception'), - ('c:data', 'PyExc_FileExistsError'), - ('c:data', 'PyExc_FileNotFoundError'), - ('c:data', 'PyExc_FloatingPointError'), - ('c:data', 'PyExc_GeneratorExit'), - ('c:data', 'PyExc_ImportError'), - ('c:data', 'PyExc_IndentationError'), - ('c:data', 'PyExc_IndexError'), - ('c:data', 'PyExc_InterruptedError'), - ('c:data', 'PyExc_IsADirectoryError'), - ('c:data', 'PyExc_KeyboardInterrupt'), - ('c:data', 'PyExc_KeyError'), - ('c:data', 'PyExc_LookupError'), - ('c:data', 'PyExc_MemoryError'), - ('c:data', 'PyExc_ModuleNotFoundError'), - ('c:data', 'PyExc_NameError'), - ('c:data', 'PyExc_NotADirectoryError'), - ('c:data', 'PyExc_NotImplementedError'), - ('c:data', 'PyExc_OSError'), - ('c:data', 'PyExc_OverflowError'), - ('c:data', 'PyExc_PermissionError'), - ('c:data', 'PyExc_ProcessLookupError'), - ('c:data', 'PyExc_PythonFinalizationError'), - ('c:data', 'PyExc_RecursionError'), - ('c:data', 'PyExc_ReferenceError'), - ('c:data', 'PyExc_RuntimeError'), - ('c:data', 'PyExc_StopAsyncIteration'), - ('c:data', 'PyExc_StopIteration'), - ('c:data', 'PyExc_SyntaxError'), - ('c:data', 'PyExc_SystemError'), - ('c:data', 'PyExc_SystemExit'), - ('c:data', 'PyExc_TabError'), - ('c:data', 'PyExc_TimeoutError'), - ('c:data', 'PyExc_TypeError'), - ('c:data', 'PyExc_UnboundLocalError'), - ('c:data', 'PyExc_UnicodeDecodeError'), - ('c:data', 'PyExc_UnicodeEncodeError'), - ('c:data', 'PyExc_UnicodeError'), - ('c:data', 'PyExc_UnicodeTranslateError'), - ('c:data', 'PyExc_ValueError'), - ('c:data', 'PyExc_ZeroDivisionError'), - # C API: Standard Python warning classes - ('c:data', 'PyExc_BytesWarning'), - ('c:data', 'PyExc_DeprecationWarning'), - ('c:data', 'PyExc_FutureWarning'), - ('c:data', 'PyExc_ImportWarning'), - ('c:data', 'PyExc_PendingDeprecationWarning'), - ('c:data', 'PyExc_ResourceWarning'), - ('c:data', 'PyExc_RuntimeWarning'), - ('c:data', 'PyExc_SyntaxWarning'), - ('c:data', 'PyExc_UnicodeWarning'), - ('c:data', 'PyExc_UserWarning'), - ('c:data', 'PyExc_Warning'), - # Undocumented public C macros - ('c:macro', 'Py_BUILD_ASSERT'), - ('c:macro', 'Py_BUILD_ASSERT_EXPR'), # Do not error nit-picky mode builds when _SubParsersAction.add_parser cannot # be resolved, as the method is currently undocumented. For context, see # https://github.com/python/cpython/pull/103289. ('py:meth', '_SubParsersAction.add_parser'), # Attributes/methods/etc. that definitely should be documented better, # but are deferred for now: - ('py:attr', '__annotations__'), - ('py:meth', '__missing__'), ('py:attr', '__wrapped__'), - ('py:attr', 'decimal.Context.clamp'), - ('py:meth', 'index'), # list.index, tuple.index, etc. ] # gh-106948: Copy standard C types declared in the "c:type" domain and C @@ -433,11 +356,12 @@ 'papersize': 'a4paper', # The font size ('10pt', '11pt' or '12pt'). 'pointsize': '10pt', + 'maxlistdepth': '8', # See https://github.com/python/cpython/issues/139588 } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). -_stdauthor = 'Guido van Rossum and the Python development team' +_stdauthor = 'The Python development team' latex_documents = [ ('c-api/index', 'c-api.tex', 'The Python/C API', _stdauthor, 'manual'), ( @@ -512,11 +436,31 @@ epub_author = 'Python Documentation Authors' epub_publisher = 'Python Software Foundation' +epub_exclude_files = ('index.xhtml', 'download.xhtml') # index pages are not valid xhtml # https://github.com/sphinx-doc/sphinx/issues/12359 epub_use_index = False +# translation tag +# --------------- + +language_code = None +for arg in sys.argv: + if arg.startswith('language='): + language_code = arg.split('=', 1)[1] + +if language_code: + tags.add('translation') # noqa: F821 + + rst_epilog += f"""\ +.. _TRANSLATION_REPO: https://github.com/python/python-docs-{language_code.replace("_", "-").lower()} +""" # noqa: F821 +else: + rst_epilog += """\ +.. _TRANSLATION_REPO: https://github.com/python +""" + # Options for the coverage checker # -------------------------------- @@ -630,13 +574,11 @@ 'image': '_static/og-image.png', 'line_color': '#3776ab', } -ogp_custom_meta_tags = [ - '', -] +ogp_custom_meta_tags = ('',) if 'create-social-cards' not in tags: # noqa: F821 # Define a static preview image when not creating social cards ogp_image = '_static/og-image.png' - ogp_custom_meta_tags += [ + ogp_custom_meta_tags += ( '', '', - ] + ) diff --git a/Doc/data/python3.14.abi b/Doc/data/python3.14.abi new file mode 100644 index 00000000000000..372c35f3713e2b --- /dev/null +++ b/Doc/data/python3.14.abi @@ -0,0 +1,31645 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index ca99b9e6d37141..1cc1b44a5b8e3a 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0: PyFunction_GetAnnotations:PyObject*::0: PyFunction_GetAnnotations:PyObject*:op:0: +PyFunction_GET_ANNOTATIONS:PyObject*::0: +PyFunction_GET_ANNOTATIONS:PyObject*:op:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: +PyFunction_GET_CLOSURE:PyObject*::0: +PyFunction_GET_CLOSURE:PyObject*:op:0: + PyFunction_GetCode:PyObject*::0: PyFunction_GetCode:PyObject*:op:0: +PyFunction_GET_CODE:PyObject*::0: +PyFunction_GET_CODE:PyObject*:op:0: + PyFunction_GetDefaults:PyObject*::0: PyFunction_GetDefaults:PyObject*:op:0: +PyFunction_GET_DEFAULTS:PyObject*::0: +PyFunction_GET_DEFAULTS:PyObject*:op:0: + +PyFunction_GetKwDefaults:PyObject*::0: +PyFunction_GetKwDefaults:PyObject*:op:0: + +PyFunction_GET_KW_DEFAULTS:PyObject*::0: +PyFunction_GET_KW_DEFAULTS:PyObject*:op:0: + PyFunction_GetGlobals:PyObject*::0: PyFunction_GetGlobals:PyObject*:op:0: +PyFunction_GET_GLOBALS:PyObject*::0: +PyFunction_GET_GLOBALS:PyObject*:op:0: + PyFunction_GetModule:PyObject*::0: PyFunction_GetModule:PyObject*:op:0: +PyFunction_GET_MODULE:PyObject*::0: +PyFunction_GET_MODULE:PyObject*:op:0: + PyFunction_New:PyObject*::+1: PyFunction_New:PyObject*:code:+1: PyFunction_New:PyObject*:globals:+1: @@ -1120,6 +1144,9 @@ PyInterpreterState_Clear:PyInterpreterState*:interp:: PyInterpreterState_Delete:void::: PyInterpreterState_Delete:PyInterpreterState*:interp:: +PyInterpreterState_GetDict:PyObject*::0: +PyInterpreterState_GetDict:PyInterpreterState*:interp:: + PyInterpreterState_GetID:int64_t::: PyInterpreterState_GetID:PyInterpreterState*:interp:: @@ -1492,9 +1519,6 @@ PyModule_SetDocString:int::: PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: -PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:: - PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -2391,6 +2415,10 @@ PyType_GetFlags:PyTypeObject*:type:0: PyType_GetName:PyObject*::+1: PyType_GetName:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyObject*::0: +PyType_GetModuleByDef:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyModuleDef*:def:: + PyType_GetQualName:PyObject*::+1: PyType_GetQualName:PyTypeObject*:type:0: @@ -2781,6 +2809,9 @@ PyUnicode_AppendAndDel:void::: PyUnicode_AppendAndDel:PyObject**:p_left:0: PyUnicode_AppendAndDel:PyObject*:right:-1: +PyUnicode_BuildEncodingMap:PyObject*::+1: +PyUnicode_BuildEncodingMap:PyObject*:string::: + PyUnicode_GetDefaultEncoding:const char*::: PyUnicode_GetDefaultEncoding::void:: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 3d68487d07baf2..b69799f5a4235a 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -1,4 +1,12 @@ role,name,added,ifdef_note,struct_abi_kind +macro,METH_CLASS,3.2,, +macro,METH_COEXIST,3.2,, +macro,METH_FASTCALL,3.7,, +macro,METH_METHOD,3.7,, +macro,METH_NOARGS,3.2,, +macro,METH_O,3.2,, +macro,METH_STATIC,3.2,, +macro,METH_VARARGS,3.2,, macro,PY_VECTORCALL_ARGUMENTS_OFFSET,3.12,, func,PyAIter_Check,3.10,, func,PyArg_Parse,3.2,, @@ -8,6 +16,26 @@ func,PyArg_UnpackTuple,3.2,, func,PyArg_VaParse,3.2,, func,PyArg_VaParseTupleAndKeywords,3.2,, func,PyArg_ValidateKeywordArguments,3.2,, +macro,PyBUF_ANY_CONTIGUOUS,3.11,, +macro,PyBUF_CONTIG,3.11,, +macro,PyBUF_CONTIG_RO,3.11,, +macro,PyBUF_C_CONTIGUOUS,3.11,, +macro,PyBUF_FORMAT,3.11,, +macro,PyBUF_FULL,3.11,, +macro,PyBUF_FULL_RO,3.11,, +macro,PyBUF_F_CONTIGUOUS,3.11,, +macro,PyBUF_INDIRECT,3.11,, +macro,PyBUF_MAX_NDIM,3.11,, +macro,PyBUF_ND,3.11,, +macro,PyBUF_READ,3.11,, +macro,PyBUF_RECORDS,3.11,, +macro,PyBUF_RECORDS_RO,3.11,, +macro,PyBUF_SIMPLE,3.11,, +macro,PyBUF_STRIDED,3.11,, +macro,PyBUF_STRIDED_RO,3.11,, +macro,PyBUF_STRIDES,3.11,, +macro,PyBUF_WRITABLE,3.11,, +macro,PyBUF_WRITE,3.11,, data,PyBaseObject_Type,3.2,, func,PyBool_FromLong,3.2,, data,PyBool_Type,3.2,, @@ -426,6 +454,7 @@ data,PyMethodDescr_Type,3.2,, type,PyModuleDef,3.2,,full-abi type,PyModuleDef_Base,3.2,,full-abi func,PyModuleDef_Init,3.5,, +type,PyModuleDef_Slot,3.5,,full-abi data,PyModuleDef_Type,3.5,, func,PyModule_Add,3.13,, func,PyModule_AddFunctions,3.7,, @@ -836,6 +865,14 @@ func,PyWeakref_NewRef,3.2,, data,PyWrapperDescr_Type,3.2,, func,PyWrapper_New,3.2,, data,PyZip_Type,3.2,, +macro,Py_ASNATIVEBYTES_ALLOW_INDEX,3.14,, +macro,Py_ASNATIVEBYTES_BIG_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_DEFAULTS,3.14,, +macro,Py_ASNATIVEBYTES_LITTLE_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_NATIVE_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_REJECT_NEGATIVE,3.14,, +macro,Py_ASNATIVEBYTES_UNSIGNED_BUFFER,3.14,, +macro,Py_AUDIT_READ,3.12,, func,Py_AddPendingCall,3.2,, func,Py_AtExit,3.2,, macro,Py_BEGIN_ALLOW_THREADS,3.2,, @@ -888,22 +925,136 @@ func,Py_NewInterpreter,3.2,, func,Py_NewRef,3.10,, func,Py_PACK_FULL_VERSION,3.14,, func,Py_PACK_VERSION,3.14,, +macro,Py_READONLY,3.12,, func,Py_REFCNT,3.14,, +macro,Py_RELATIVE_OFFSET,3.12,, func,Py_ReprEnter,3.2,, func,Py_ReprLeave,3.2,, func,Py_SetProgramName,3.2,, func,Py_SetPythonHome,3.2,, func,Py_SetRecursionLimit,3.2,, +macro,Py_TPFLAGS_BASETYPE,3.2,, +macro,Py_TPFLAGS_DEFAULT,3.2,, +macro,Py_TPFLAGS_HAVE_GC,3.2,, +macro,Py_TPFLAGS_HAVE_VECTORCALL,3.12,, +macro,Py_TPFLAGS_ITEMS_AT_END,3.12,, +macro,Py_TPFLAGS_METHOD_DESCRIPTOR,3.8,, +macro,Py_TP_USE_SPEC,3.14,, func,Py_TYPE,3.14,, +macro,Py_T_BOOL,3.12,, +macro,Py_T_BYTE,3.12,, +macro,Py_T_CHAR,3.12,, +macro,Py_T_DOUBLE,3.12,, +macro,Py_T_FLOAT,3.12,, +macro,Py_T_INT,3.12,, +macro,Py_T_LONG,3.12,, +macro,Py_T_LONGLONG,3.12,, +macro,Py_T_OBJECT_EX,3.12,, +macro,Py_T_PYSSIZET,3.12,, +macro,Py_T_SHORT,3.12,, +macro,Py_T_STRING,3.12,, +macro,Py_T_STRING_INPLACE,3.12,, +macro,Py_T_UBYTE,3.12,, +macro,Py_T_UINT,3.12,, +macro,Py_T_ULONG,3.12,, +macro,Py_T_ULONGLONG,3.12,, +macro,Py_T_USHORT,3.12,, type,Py_UCS4,3.2,, macro,Py_UNBLOCK_THREADS,3.2,, data,Py_UTF8Mode,3.8,, func,Py_VaBuildValue,3.2,, data,Py_Version,3.11,, func,Py_XNewRef,3.10,, +macro,Py_am_aiter,3.5,, +macro,Py_am_anext,3.5,, +macro,Py_am_await,3.5,, +macro,Py_am_send,3.10,, +macro,Py_bf_getbuffer,3.11,, +macro,Py_bf_releasebuffer,3.11,, type,Py_buffer,3.11,,full-abi type,Py_intptr_t,3.2,, +macro,Py_mod_create,3.5,, +macro,Py_mod_exec,3.5,, +macro,Py_mod_gil,3.13,, +macro,Py_mod_multiple_interpreters,3.12,, +macro,Py_mp_ass_subscript,3.2,, +macro,Py_mp_length,3.2,, +macro,Py_mp_subscript,3.2,, +macro,Py_nb_absolute,3.2,, +macro,Py_nb_add,3.2,, +macro,Py_nb_and,3.2,, +macro,Py_nb_bool,3.2,, +macro,Py_nb_divmod,3.2,, +macro,Py_nb_float,3.2,, +macro,Py_nb_floor_divide,3.2,, +macro,Py_nb_index,3.2,, +macro,Py_nb_inplace_add,3.2,, +macro,Py_nb_inplace_and,3.2,, +macro,Py_nb_inplace_floor_divide,3.2,, +macro,Py_nb_inplace_lshift,3.2,, +macro,Py_nb_inplace_matrix_multiply,3.5,, +macro,Py_nb_inplace_multiply,3.2,, +macro,Py_nb_inplace_or,3.2,, +macro,Py_nb_inplace_power,3.2,, +macro,Py_nb_inplace_remainder,3.2,, +macro,Py_nb_inplace_rshift,3.2,, +macro,Py_nb_inplace_subtract,3.2,, +macro,Py_nb_inplace_true_divide,3.2,, +macro,Py_nb_inplace_xor,3.2,, +macro,Py_nb_int,3.2,, +macro,Py_nb_invert,3.2,, +macro,Py_nb_lshift,3.2,, +macro,Py_nb_matrix_multiply,3.5,, +macro,Py_nb_multiply,3.2,, +macro,Py_nb_negative,3.2,, +macro,Py_nb_or,3.2,, +macro,Py_nb_positive,3.2,, +macro,Py_nb_power,3.2,, +macro,Py_nb_remainder,3.2,, +macro,Py_nb_rshift,3.2,, +macro,Py_nb_subtract,3.2,, +macro,Py_nb_true_divide,3.2,, +macro,Py_nb_xor,3.2,, +macro,Py_sq_ass_item,3.2,, +macro,Py_sq_concat,3.2,, +macro,Py_sq_contains,3.2,, +macro,Py_sq_inplace_concat,3.2,, +macro,Py_sq_inplace_repeat,3.2,, +macro,Py_sq_item,3.2,, +macro,Py_sq_length,3.2,, +macro,Py_sq_repeat,3.2,, type,Py_ssize_t,3.2,, +macro,Py_tp_alloc,3.2,, +macro,Py_tp_base,3.2,, +macro,Py_tp_bases,3.2,, +macro,Py_tp_call,3.2,, +macro,Py_tp_clear,3.2,, +macro,Py_tp_dealloc,3.2,, +macro,Py_tp_del,3.2,, +macro,Py_tp_descr_get,3.2,, +macro,Py_tp_descr_set,3.2,, +macro,Py_tp_doc,3.2,, +macro,Py_tp_finalize,3.5,, +macro,Py_tp_free,3.2,, +macro,Py_tp_getattr,3.2,, +macro,Py_tp_getattro,3.2,, +macro,Py_tp_getset,3.2,, +macro,Py_tp_hash,3.2,, +macro,Py_tp_init,3.2,, +macro,Py_tp_is_gc,3.2,, +macro,Py_tp_iter,3.2,, +macro,Py_tp_iternext,3.2,, +macro,Py_tp_members,3.2,, +macro,Py_tp_methods,3.2,, +macro,Py_tp_new,3.2,, +macro,Py_tp_repr,3.2,, +macro,Py_tp_richcompare,3.2,, +macro,Py_tp_setattr,3.2,, +macro,Py_tp_setattro,3.2,, +macro,Py_tp_str,3.2,, +macro,Py_tp_token,3.14,, +macro,Py_tp_traverse,3.2,, +macro,Py_tp_vectorcall,3.14,, type,Py_uintptr_t,3.2,, type,allocfunc,3.2,, type,binaryfunc,3.2,, diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index a5cc8f1d5b3475..bac80289cdfb3d 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -1,7 +1,6 @@ Pending removal in Python 3.15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* The bundled copy of ``libmpdecimal``. * The :c:func:`PyImport_ImportModuleNoBlock`: Use :c:func:`PyImport_ImportModule` instead. * :c:func:`PyWeakref_GetObject` and :c:func:`PyWeakref_GET_OBJECT`: diff --git a/Doc/deprecations/c-api-pending-removal-in-3.16.rst b/Doc/deprecations/c-api-pending-removal-in-3.16.rst new file mode 100644 index 00000000000000..9453f83799c43d --- /dev/null +++ b/Doc/deprecations/c-api-pending-removal-in-3.16.rst @@ -0,0 +1,4 @@ +Pending removal in Python 3.16 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* The bundled copy of ``libmpdec``. diff --git a/Doc/deprecations/c-api-pending-removal-in-3.18.rst b/Doc/deprecations/c-api-pending-removal-in-3.18.rst index 564cfb79a0b513..022aee93aa70c4 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.18.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.18.rst @@ -1,11 +1,12 @@ Pending removal in Python 3.18 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* Deprecated private functions (:gh:`128863`): +* The following private functions are deprecated + and planned for removal in Python 3.18: * :c:func:`!_PyBytes_Join`: use :c:func:`PyBytes_Join`. * :c:func:`!_PyDict_GetItemStringWithError`: use :c:func:`PyDict_GetItemStringRef`. - * :c:func:`!_PyDict_Pop()`: :c:func:`PyDict_Pop`. + * :c:func:`!_PyDict_Pop()`: use :c:func:`PyDict_Pop`. * :c:func:`!_PyLong_Sign()`: use :c:func:`PyLong_GetSign`. * :c:func:`!_PyLong_FromDigits` and :c:func:`!_PyLong_New`: use :c:func:`PyLongWriter_Create`. @@ -31,7 +32,7 @@ Pending removal in Python 3.18 :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) `. * :c:func:`!_PyUnicodeWriter_WriteASCIIString`: replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. + :c:func:`PyUnicodeWriter_WriteASCII(writer, str) `. * :c:func:`!_PyUnicodeWriter_WriteLatin1String`: replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. @@ -41,5 +42,6 @@ Pending removal in Python 3.18 * :c:func:`!_Py_fopen_obj`: use :c:func:`Py_fopen`. The `pythoncapi-compat project - `__ can be used to get these - new public functions on Python 3.13 and older. + `__ can be used to get + these new public functions on Python 3.13 and older. + (Contributed by Victor Stinner in :gh:`128863`.) diff --git a/Doc/deprecations/pending-removal-in-3.13.rst b/Doc/deprecations/pending-removal-in-3.13.rst index 2fd2f12cc6a2c4..d5b8c80e8f9aa0 100644 --- a/Doc/deprecations/pending-removal-in-3.13.rst +++ b/Doc/deprecations/pending-removal-in-3.13.rst @@ -38,15 +38,3 @@ APIs: * :meth:`!unittest.TestProgram.usageExit` (:gh:`67048`) * :class:`!webbrowser.MacOSX` (:gh:`86421`) * :class:`classmethod` descriptor chaining (:gh:`89519`) -* :mod:`importlib.resources` deprecated methods: - - * ``contents()`` - * ``is_resource()`` - * ``open_binary()`` - * ``open_text()`` - * ``path()`` - * ``read_binary()`` - * ``read_text()`` - - Use :func:`importlib.resources.files` instead. Refer to `importlib-resources: Migrating from Legacy - `_ (:gh:`106531`) diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index 6159fa48848285..171758156ab117 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -38,12 +38,6 @@ Pending removal in Python 3.14 is no current event loop set and it decides to create one. (Contributed by Serhiy Storchaka and Guido van Rossum in :gh:`100160`.) -* :mod:`collections.abc`: Deprecated :class:`!collections.abc.ByteString`. - Prefer :class:`!Sequence` or :class:`~collections.abc.Buffer`. - For use in typing, prefer a union, like ``bytes | bytearray``, - or :class:`collections.abc.Buffer`. - (Contributed by Shantanu Jain in :gh:`91896`.) - * :mod:`email`: Deprecated the *isdst* parameter in :func:`email.utils.localtime`. (Contributed by Alan Williams in :gh:`72346`.) @@ -78,7 +72,7 @@ Pending removal in Python 3.14 :meth:`~pathlib.PurePath.relative_to`: passing additional arguments is deprecated. -* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:!pkgutil.get_loader` +* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:`!pkgutil.get_loader` now raise :exc:`DeprecationWarning`; use :func:`importlib.util.find_spec` instead. (Contributed by Nikita Sobolev in :gh:`97850`.) @@ -96,9 +90,6 @@ Pending removal in Python 3.14 if :ref:`named placeholders ` are used and *parameters* is a sequence instead of a :class:`dict`. -* :mod:`typing`: :class:`!typing.ByteString`, deprecated since Python 3.9, - now causes a :exc:`DeprecationWarning` to be emitted when it is used. - * :mod:`urllib`: :class:`!urllib.parse.Quoter` is deprecated: it was not intended to be a public API. diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 7b32275ad86760..c80588b27b635a 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -85,6 +85,13 @@ Pending removal in Python 3.15 has been deprecated since Python 3.13. Use the class-based syntax or the functional syntax instead. + * When using the functional syntax of :class:`~typing.TypedDict`\s, failing + to pass a value to the *fields* parameter (``TD = TypedDict("TD")``) or + passing ``None`` (``TD = TypedDict("TD", None)``) has been deprecated + since Python 3.13. + Use ``class TD(TypedDict): pass`` or ``TD = TypedDict("TD", {})`` + to create a TypedDict with zero field. + * The :func:`typing.no_type_check_decorator` decorator function has been deprecated since Python 3.13. After eight years in the :mod:`typing` module, diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst index 370b98307e5228..0a1c2f08cab3bd 100644 --- a/Doc/deprecations/pending-removal-in-3.17.rst +++ b/Doc/deprecations/pending-removal-in-3.17.rst @@ -1,6 +1,28 @@ Pending removal in Python 3.17 ------------------------------ +* :mod:`collections.abc`: + + - :class:`collections.abc.ByteString` is scheduled for removal in Python 3.17. + + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the :ref:`buffer protocol ` at runtime. For use + in type annotations, either use :class:`~collections.abc.Buffer` or a union + that explicitly specifies the types your code supports (e.g., + ``bytes | bytearray | memoryview``). + + :class:`!ByteString` was originally intended to be an abstract class that + would serve as a supertype of both :class:`bytes` and :class:`bytearray`. + However, since the ABC never had any methods, knowing that an object was an + instance of :class:`!ByteString` never actually told you anything useful + about the object. Other common buffer types such as :class:`memoryview` + were also never understood as subtypes of :class:`!ByteString` (either at + runtime or by static type checkers). + + See :pep:`PEP 688 <688#current-options>` for more details. + (Contributed by Shantanu Jain in :gh:`91896`.) + + * :mod:`typing`: - Before Python 3.14, old-style unions were implemented using the private class @@ -8,3 +30,22 @@ Pending removal in Python 3.17 but it has been retained for backward compatibility, with removal scheduled for Python 3.17. Users should use documented introspection helpers like :func:`typing.get_origin` and :func:`typing.get_args` instead of relying on private implementation details. + - :class:`typing.ByteString`, deprecated since Python 3.9, is scheduled for removal in + Python 3.17. + + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the :ref:`buffer protocol ` at runtime. For use + in type annotations, either use :class:`~collections.abc.Buffer` or a union + that explicitly specifies the types your code supports (e.g., + ``bytes | bytearray | memoryview``). + + :class:`!ByteString` was originally intended to be an abstract class that + would serve as a supertype of both :class:`bytes` and :class:`bytearray`. + However, since the ABC never had any methods, knowing that an object was an + instance of :class:`!ByteString` never actually told you anything useful + about the object. Other common buffer types such as :class:`memoryview` + were also never understood as subtypes of :class:`!ByteString` (either at + runtime or by static type checkers). + + See :pep:`PEP 688 <688#current-options>` for more details. + (Contributed by Shantanu Jain in :gh:`91896`.) diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 4c4a368baca955..43e06da6c282fe 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -15,7 +15,6 @@ although there is currently no date scheduled for their removal. * :mod:`builtins`: - * ``bool(NotImplemented)``. * Generators: ``throw(type, exc, tb)`` and ``athrow(type, exc, tb)`` signature is deprecated: use ``throw(exc)`` and ``athrow(exc)`` instead, the single argument signature. diff --git a/Doc/extending/building.rst b/Doc/extending/building.rst index ddde567f6f3efa..098dde39ea597e 100644 --- a/Doc/extending/building.rst +++ b/Doc/extending/building.rst @@ -6,41 +6,10 @@ Building C and C++ Extensions ***************************** -A C extension for CPython is a shared library (e.g. a ``.so`` file on Linux, -``.pyd`` on Windows), which exports an *initialization function*. +A C extension for CPython is a shared library (for example, a ``.so`` file on +Linux, ``.pyd`` on Windows), which exports an *initialization function*. -To be importable, the shared library must be available on :envvar:`PYTHONPATH`, -and must be named after the module name, with an appropriate extension. -When using setuptools, the correct filename is generated automatically. - -The initialization function has the signature: - -.. c:function:: PyObject* PyInit_modulename(void) - -It returns either a fully initialized module, or a :c:type:`PyModuleDef` -instance. See :ref:`initializing-modules` for details. - -.. highlight:: python - -For modules with ASCII-only names, the function must be named -``PyInit_``, with ```` replaced by the name of the -module. When using :ref:`multi-phase-initialization`, non-ASCII module names -are allowed. In this case, the initialization function name is -``PyInitU_``, with ```` encoded using Python's -*punycode* encoding with hyphens replaced by underscores. In Python:: - - def initfunc_name(name): - try: - suffix = b'_' + name.encode('ascii') - except UnicodeEncodeError: - suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') - return b'PyInit' + suffix - -It is possible to export multiple modules from a single shared library by -defining multiple initialization functions. However, importing them requires -using symbolic links or a custom importer, because by default only the -function corresponding to the filename is found. -See the *"Multiple modules in one library"* section in :pep:`489` for details. +See :ref:`extension-modules` for details. .. highlight:: c @@ -51,7 +20,11 @@ See the *"Multiple modules in one library"* section in :pep:`489` for details. Building C and C++ Extensions with setuptools ============================================= -Python 3.12 and newer no longer come with distutils. Please refer to the -``setuptools`` documentation at -https://setuptools.readthedocs.io/en/latest/setuptools.html -to learn more about how build and distribute C/C++ extensions with setuptools. + +Building, packaging and distributing extension modules is best done with +third-party tools, and is out of scope of this document. +One suitable tool is Setuptools, whose documentation can be found at +https://setuptools.pypa.io/en/latest/setuptools.html. + +The :mod:`distutils` module, which was included in the standard library +until Python 3.12, is now maintained as part of Setuptools. diff --git a/Doc/extending/embedding.rst b/Doc/extending/embedding.rst index b777862da79f14..cb41889437c8b0 100644 --- a/Doc/extending/embedding.rst +++ b/Doc/extending/embedding.rst @@ -245,21 +245,23 @@ Python extension. For example:: return PyLong_FromLong(numargs); } - static PyMethodDef EmbMethods[] = { + static PyMethodDef emb_module_methods[] = { {"numargs", emb_numargs, METH_VARARGS, "Return the number of arguments received by the process."}, {NULL, NULL, 0, NULL} }; - static PyModuleDef EmbModule = { - PyModuleDef_HEAD_INIT, "emb", NULL, -1, EmbMethods, - NULL, NULL, NULL, NULL + static struct PyModuleDef emb_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "emb", + .m_size = 0, + .m_methods = emb_module_methods, }; static PyObject* PyInit_emb(void) { - return PyModule_Create(&EmbModule); + return PyModuleDef_Init(&emb_module); } Insert the above code just above the :c:func:`main` function. Also, insert the diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index b0493bed75b151..f9b65643dfe888 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -75,12 +75,37 @@ the module and a copyright notice if you like). See :ref:`arg-parsing-string-and-buffers` for a description of this macro. All user-visible symbols defined by :file:`Python.h` have a prefix of ``Py`` or -``PY``, except those defined in standard header files. For convenience, and -since they are used extensively by the Python interpreter, ``"Python.h"`` -includes a few standard header files: ````, ````, -````, and ````. If the latter header file does not exist on -your system, it declares the functions :c:func:`malloc`, :c:func:`free` and -:c:func:`realloc` directly. +``PY``, except those defined in standard header files. + +.. tip:: + + For backward compatibility, :file:`Python.h` includes several standard header files. + C extensions should include the standard headers that they use, + and should not rely on these implicit includes. + If using the limited C API version 3.13 or newer, the implicit includes are: + + * ```` + * ```` (on Windows) + * ```` + * ```` + * ```` + * ```` + * ```` + * ```` (if present) + + If :c:macro:`Py_LIMITED_API` is not defined, or is set to version 3.12 or older, + the headers below are also included: + + * ```` + * ```` (on POSIX) + + If :c:macro:`Py_LIMITED_API` is not defined, or is set to version 3.10 or older, + the headers below are also included: + + * ```` + * ```` + * ```` + * ```` The next thing we add to our module file is the C function that will be called when the Python expression ``spam.system(string)`` is evaluated (we'll see @@ -203,31 +228,57 @@ function usually raises :c:data:`PyExc_TypeError`. If you have an argument whos value must be in a particular range or must satisfy other conditions, :c:data:`PyExc_ValueError` is appropriate. -You can also define a new exception that is unique to your module. For this, you -usually declare a static object variable at the beginning of your file:: +You can also define a new exception that is unique to your module. +The simplest way to do this is to declare a static global object variable at +the beginning of the file:: - static PyObject *SpamError; + static PyObject *SpamError = NULL; -and initialize it in your module's initialization function (:c:func:`!PyInit_spam`) -with an exception object:: +and initialize it by calling :c:func:`PyErr_NewException` in the module's +:c:data:`Py_mod_exec` function (:c:func:`!spam_module_exec`):: - PyMODINIT_FUNC - PyInit_spam(void) - { - PyObject *m; + SpamError = PyErr_NewException("spam.error", NULL, NULL); - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; +Since :c:data:`!SpamError` is a global variable, it will be overwritten every time +the module is reinitialized, when the :c:data:`Py_mod_exec` function is called. + +For now, let's avoid the issue: we will block repeated initialization by raising an +:py:exc:`ImportError`:: + static PyObject *SpamError = NULL; + + static int + spam_module_exec(PyObject *m) + { + if (SpamError != NULL) { + PyErr_SetString(PyExc_ImportError, + "cannot initialize spam module more than once"); + return -1; + } SpamError = PyErr_NewException("spam.error", NULL, NULL); - if (PyModule_AddObjectRef(m, "error", SpamError) < 0) { - Py_CLEAR(SpamError); - Py_DECREF(m); - return NULL; + if (PyModule_AddObjectRef(m, "SpamError", SpamError) < 0) { + return -1; } - return m; + return 0; + } + + static PyModuleDef_Slot spam_module_slots[] = { + {Py_mod_exec, spam_module_exec}, + {0, NULL} + }; + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + .m_size = 0, // non-negative + .m_slots = spam_module_slots, + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); } Note that the Python name for the exception object is :exc:`!spam.error`. The @@ -242,6 +293,11 @@ needed to ensure that it will not be discarded, causing :c:data:`!SpamError` to become a dangling pointer. Should it become a dangling pointer, C code which raises the exception could cause a core dump or other unintended side effects. +For now, the :c:func:`Py_DECREF` call to remove this reference is missing. +Even when the Python interpreter shuts down, the global :c:data:`!SpamError` +variable will not be garbage-collected. It will "leak". +We did, however, ensure that this will happen at most once per process. + We discuss the use of :c:macro:`PyMODINIT_FUNC` as a function return type later in this sample. @@ -318,7 +374,7 @@ The Module's Method Table and Initialization Function I promised to show how :c:func:`!spam_system` is called from Python programs. First, we need to list its name and address in a "method table":: - static PyMethodDef SpamMethods[] = { + static PyMethodDef spam_methods[] = { ... {"system", spam_system, METH_VARARGS, "Execute a shell command."}, @@ -343,13 +399,10 @@ function. The method table must be referenced in the module definition structure:: - static struct PyModuleDef spammodule = { - PyModuleDef_HEAD_INIT, - "spam", /* name of module */ - spam_doc, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - SpamMethods + static struct PyModuleDef spam_module = { + ... + .m_methods = spam_methods, + ... }; This structure, in turn, must be passed to the interpreter in the module's @@ -360,23 +413,17 @@ only non-\ ``static`` item defined in the module file:: PyMODINIT_FUNC PyInit_spam(void) { - return PyModule_Create(&spammodule); + return PyModuleDef_Init(&spam_module); } Note that :c:macro:`PyMODINIT_FUNC` declares the function as ``PyObject *`` return type, declares any special linkage declarations required by the platform, and for C++ declares the function as ``extern "C"``. -When the Python program imports module :mod:`!spam` for the first time, -:c:func:`!PyInit_spam` is called. (See below for comments about embedding Python.) -It calls :c:func:`PyModule_Create`, which returns a module object, and -inserts built-in function objects into the newly created module based upon the -table (an array of :c:type:`PyMethodDef` structures) found in the module definition. -:c:func:`PyModule_Create` returns a pointer to the module object -that it creates. It may abort with a fatal error for -certain errors, or return ``NULL`` if the module could not be initialized -satisfactorily. The init function must return the module object to its caller, -so that it then gets inserted into ``sys.modules``. +:c:func:`!PyInit_spam` is called when each interpreter imports its module +:mod:`!spam` for the first time. (See below for comments about embedding Python.) +A pointer to the module definition must be returned via :c:func:`PyModuleDef_Init`, +so that the import machinery can create the module and store it in ``sys.modules``. When embedding Python, the :c:func:`!PyInit_spam` function is not called automatically unless there's an entry in the :c:data:`PyImport_Inittab` table. @@ -433,23 +480,19 @@ optionally followed by an import of the module:: .. note:: - Removing entries from ``sys.modules`` or importing compiled modules into - multiple interpreters within a process (or following a :c:func:`fork` without an - intervening :c:func:`exec`) can create problems for some extension modules. - Extension module authors should exercise caution when initializing internal data - structures. + If you declare a global variable or a local static one, the module may + experience unintended side-effects on re-initialisation, for example when + removing entries from ``sys.modules`` or importing compiled modules into + multiple interpreters within a process + (or following a :c:func:`fork` without an intervening :c:func:`exec`). + If module state is not yet fully :ref:`isolated `, + authors should consider marking the module as having no support for subinterpreters + (via :c:macro:`Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED`). A more substantial example module is included in the Python source distribution -as :file:`Modules/xxmodule.c`. This file may be used as a template or simply +as :file:`Modules/xxlimited.c`. This file may be used as a template or simply read as an example. -.. note:: - - Unlike our ``spam`` example, ``xxmodule`` uses *multi-phase initialization* - (new in Python 3.5), where a PyModuleDef structure is returned from - ``PyInit_spam``, and creation of the module is left to the import machinery. - For details on multi-phase initialization, see :PEP:`489`. - .. _compilation: @@ -790,18 +833,17 @@ Philbrick (philbrick@hks.com):: {NULL, NULL, 0, NULL} /* sentinel */ }; - static struct PyModuleDef keywdargmodule = { - PyModuleDef_HEAD_INIT, - "keywdarg", - NULL, - -1, - keywdarg_methods + static struct PyModuleDef keywdarg_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "keywdarg", + .m_size = 0, + .m_methods = keywdarg_methods, }; PyMODINIT_FUNC PyInit_keywdarg(void) { - return PyModule_Create(&keywdargmodule); + return PyModuleDef_Init(&keywdarg_module); } @@ -1042,7 +1084,14 @@ references to all its items, so when item 1 is replaced, it has to dispose of the original item 1. Now let's suppose the original item 1 was an instance of a user-defined class, and let's further suppose that the class defined a :meth:`!__del__` method. If this class instance has a reference count of 1, -disposing of it will call its :meth:`!__del__` method. +disposing of it will call its :meth:`!__del__` method. Internally, +:c:func:`PyList_SetItem` calls :c:func:`Py_DECREF` on the replaced item, +which invokes replaced item's corresponding +:c:member:`~PyTypeObject.tp_dealloc` function. During +deallocation, :c:member:`~PyTypeObject.tp_dealloc` calls +:c:member:`~PyTypeObject.tp_finalize`, which is mapped to the +:meth:`!__del__` method for class instances (see :pep:`442`). This entire +sequence happens synchronously within the :c:func:`PyList_SetItem` call. Since it is written in Python, the :meth:`!__del__` method can execute arbitrary Python code. Could it perhaps do something to invalidate the reference to @@ -1072,8 +1121,9 @@ why his :meth:`!__del__` methods would fail... The second case of problems with a borrowed reference is a variant involving threads. Normally, multiple threads in the Python interpreter can't get in each -other's way, because there is a global lock protecting Python's entire object -space. However, it is possible to temporarily release this lock using the macro +other's way, because there is a :term:`global lock ` +protecting Python's entire object space. +However, it is possible to temporarily release this lock using the macro :c:macro:`Py_BEGIN_ALLOW_THREADS`, and to re-acquire it using :c:macro:`Py_END_ALLOW_THREADS`. This is common around blocking I/O calls, to let other threads use the processor while waiting for the I/O to complete. @@ -1259,20 +1309,15 @@ two more lines must be added:: #include "spammodule.h" The ``#define`` is used to tell the header file that it is being included in the -exporting module, not a client module. Finally, the module's initialization -function must take care of initializing the C API pointer array:: +exporting module, not a client module. Finally, the module's :c:data:`mod_exec +` function must take care of initializing the C API pointer array:: - PyMODINIT_FUNC - PyInit_spam(void) + static int + spam_module_exec(PyObject *m) { - PyObject *m; static void *PySpam_API[PySpam_API_pointers]; PyObject *c_api_object; - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; - /* Initialize the C API pointer array */ PySpam_API[PySpam_System_NUM] = (void *)PySpam_System; @@ -1280,11 +1325,10 @@ function must take care of initializing the C API pointer array:: c_api_object = PyCapsule_New((void *)PySpam_API, "spam._C_API", NULL); if (PyModule_Add(m, "_C_API", c_api_object) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Note that ``PySpam_API`` is declared ``static``; otherwise the pointer @@ -1343,20 +1387,16 @@ like this:: All that a client module must do in order to have access to the function :c:func:`!PySpam_System` is to call the function (or rather macro) -:c:func:`!import_spam` in its initialization function:: +:c:func:`!import_spam` in its :c:data:`mod_exec ` function:: - PyMODINIT_FUNC - PyInit_client(void) + static int + client_module_exec(PyObject *m) { - PyObject *m; - - m = PyModule_Create(&clientmodule); - if (m == NULL) - return NULL; - if (import_spam() < 0) - return NULL; + if (import_spam() < 0) { + return -1; + } /* additional initialization can happen here */ - return m; + return 0; } The main disadvantage of this approach is that the file :file:`spammodule.h` is diff --git a/Doc/extending/index.rst b/Doc/extending/index.rst index 01b4df6d44acff..4cc2c96d8d5b47 100644 --- a/Doc/extending/index.rst +++ b/Doc/extending/index.rst @@ -26,19 +26,9 @@ Recommended third party tools ============================= This guide only covers the basic tools for creating extensions provided -as part of this version of CPython. Third party tools like -`Cython `_, `cffi `_, -`SWIG `_ and `Numba `_ -offer both simpler and more sophisticated approaches to creating C and C++ -extensions for Python. - -.. seealso:: - - `Python Packaging User Guide: Binary Extensions `_ - The Python Packaging User Guide not only covers several available - tools that simplify the creation of binary extensions, but also - discusses the various reasons why creating an extension module may be - desirable in the first place. +as part of this version of CPython. Some :ref:`third party tools +` offer both simpler and more sophisticated approaches to creating +C and C++ extensions for Python. Creating extensions without third party tools @@ -49,6 +39,10 @@ assistance from third party tools. It is intended primarily for creators of those tools, rather than being a recommended way to create your own C extensions. +.. seealso:: + + :pep:`489` -- Multi-phase extension module initialization + .. toctree:: :maxdepth: 2 :numbered: diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index 3fc91841416d71..3bbee33bd50698 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -55,8 +55,10 @@ from the previous chapter. This file defines three things: #. How the :class:`!Custom` **type** behaves: this is the ``CustomType`` struct, which defines a set of flags and function pointers that the interpreter inspects when specific operations are requested. -#. How to initialize the :mod:`!custom` module: this is the ``PyInit_custom`` - function and the associated ``custommodule`` struct. +#. How to define and execute the :mod:`!custom` module: this is the + ``PyInit_custom`` function and the associated ``custom_module`` struct for + defining the module, and the ``custom_module_exec`` function to set up + a fresh module object. The first bit is:: @@ -171,18 +173,18 @@ implementation provided by the API function :c:func:`PyType_GenericNew`. :: .tp_new = PyType_GenericNew, Everything else in the file should be familiar, except for some code in -:c:func:`!PyInit_custom`:: +:c:func:`!custom_module_exec`:: - if (PyType_Ready(&CustomType) < 0) - return; + if (PyType_Ready(&CustomType) < 0) { + return -1; + } This initializes the :class:`!Custom` type, filling in a number of members to the appropriate default values, including :c:member:`~PyObject.ob_type` that we initially set to ``NULL``. :: if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } This adds the type to the module dictionary. This allows us to create @@ -275,7 +277,7 @@ be an instance of a subclass. The explicit cast to ``CustomObject *`` above is needed because we defined ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc`` function pointer expects to receive a ``PyObject *`` argument. - By assigning to the the ``tp_dealloc`` slot of a type, we declare + By assigning to the ``tp_dealloc`` slot of a type, we declare that it can only be called with instances of our ``CustomObject`` class, so the cast to ``(CustomObject *)`` is safe. This is object-oriented polymorphism, in C! @@ -875,27 +877,22 @@ but let the base class handle it by calling its own :c:member:`~PyTypeObject.tp_ The :c:type:`PyTypeObject` struct supports a :c:member:`~PyTypeObject.tp_base` specifying the type's concrete base class. Due to cross-platform compiler issues, you can't fill that field directly with a reference to -:c:type:`PyList_Type`; it should be done later in the module initialization +:c:type:`PyList_Type`; it should be done in the :c:data:`Py_mod_exec` function:: - PyMODINIT_FUNC - PyInit_sublist(void) + static int + sublist_module_exec(PyObject *m) { - PyObject* m; SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Before calling :c:func:`PyType_Ready`, the type structure must have the diff --git a/Doc/extending/windows.rst b/Doc/extending/windows.rst index 56aa44e4e58c83..a97c6182553c30 100644 --- a/Doc/extending/windows.rst +++ b/Doc/extending/windows.rst @@ -121,7 +121,7 @@ When creating DLLs in Windows, you can use the CPython library in two ways: :file:`Python.h` triggers an implicit, configure-aware link with the library. The header file chooses :file:`pythonXY_d.lib` for Debug, :file:`pythonXY.lib` for Release, and :file:`pythonX.lib` for Release with - the `Limited API `_ enabled. + the :ref:`Limited API ` enabled. To build two DLLs, spam and ni (which uses C functions found in spam), you could use these commands:: diff --git a/Doc/faq/design.rst b/Doc/faq/design.rst index e2710fab9cf800..73c670b0a138c2 100644 --- a/Doc/faq/design.rst +++ b/Doc/faq/design.rst @@ -589,9 +589,9 @@ exhaustive test suites that exercise every line of code in a module. An appropriate testing discipline can help build large complex applications in Python as well as having interface specifications would. In fact, it can be better because an interface specification cannot test certain properties of a -program. For example, the :meth:`!list.append` method is expected to add new elements +program. For example, the :meth:`list.append` method is expected to add new elements to the end of some internal list; an interface specification cannot test that -your :meth:`!list.append` implementation will actually do this correctly, but it's +your :meth:`list.append` implementation will actually do this correctly, but it's trivial to check this property in a test suite. Writing test suites is very helpful, and you might want to design your code to diff --git a/Doc/faq/extending.rst b/Doc/faq/extending.rst index 3147fda7c37124..1d5abed2317b0c 100644 --- a/Doc/faq/extending.rst +++ b/Doc/faq/extending.rst @@ -37,24 +37,9 @@ Writing C is hard; are there any alternatives? ---------------------------------------------- There are a number of alternatives to writing your own C extensions, depending -on what you're trying to do. - -.. XXX make sure these all work - -`Cython `_ and its relative `Pyrex -`_ are compilers -that accept a slightly modified form of Python and generate the corresponding -C code. Cython and Pyrex make it possible to write an extension without having -to learn Python's C API. - -If you need to interface to some C or C++ library for which no Python extension -currently exists, you can try wrapping the library's data types and functions -with a tool such as `SWIG `_. `SIP -`__, `CXX -`_ `Boost -`_, or `Weave -`_ are also -alternatives for wrapping C++ libraries. +on what you're trying to do. :ref:`Recommended third party tools ` +offer both simpler and more sophisticated approaches to creating C and C++ +extensions for Python. How can I execute arbitrary Python statements from C? diff --git a/Doc/faq/general.rst b/Doc/faq/general.rst index 578777d7f23621..698f2117ff5c64 100644 --- a/Doc/faq/general.rst +++ b/Doc/faq/general.rst @@ -186,7 +186,7 @@ How do I get documentation on Python? ------------------------------------- The standard documentation for the current stable version of Python is available -at https://docs.python.org/3/. PDF, plain text, and downloadable HTML versions are +at https://docs.python.org/3/. EPUB, plain text, and downloadable HTML versions are also available at https://docs.python.org/3/download.html. The documentation is written in reStructuredText and processed by `the Sphinx diff --git a/Doc/faq/programming.rst b/Doc/faq/programming.rst index 9f9e4fab685b19..6f9dfa8616ed44 100644 --- a/Doc/faq/programming.rst +++ b/Doc/faq/programming.rst @@ -454,7 +454,7 @@ There are two factors that produce this result: (the list), and both ``x`` and ``y`` refer to it. 2) Lists are :term:`mutable`, which means that you can change their content. -After the call to :meth:`!append`, the content of the mutable object has +After the call to :meth:`~sequence.append`, the content of the mutable object has changed from ``[]`` to ``[10]``. Since both the variables refer to the same object, using either name accesses the modified value ``[10]``. @@ -1397,9 +1397,9 @@ To see why this happens, you need to know that (a) if an object implements an :meth:`~object.__iadd__` magic method, it gets called when the ``+=`` augmented assignment is executed, and its return value is what gets used in the assignment statement; -and (b) for lists, :meth:`!__iadd__` is equivalent to calling :meth:`!extend` on the list -and returning the list. That's why we say that for lists, ``+=`` is a -"shorthand" for :meth:`!list.extend`:: +and (b) for lists, :meth:`!__iadd__` is equivalent to calling +:meth:`~sequence.extend` on the list and returning the list. +That's why we say that for lists, ``+=`` is a "shorthand" for :meth:`list.extend`:: >>> a_list = [] >>> a_list += [1] diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 0b26e18efd7f1b..a4066d42927f64 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -21,7 +21,9 @@ Glossary right delimiters (parentheses, square brackets, curly braces or triple quotes), or after specifying a decorator. - * The :const:`Ellipsis` built-in constant. + .. index:: single: ...; ellipsis literal + + * The three dots form of the :ref:`Ellipsis ` object. abstract base class Abstract base classes complement :term:`duck-typing` by @@ -107,7 +109,7 @@ Glossary statements. asynchronous generator iterator - An object created by a :term:`asynchronous generator` function. + An object created by an :term:`asynchronous generator` function. This is an :term:`asynchronous iterator` which when called using the :meth:`~object.__anext__` method returns an awaitable object which will execute @@ -355,6 +357,12 @@ Glossary tasks (see :mod:`asyncio`) associate each task with a context which becomes the current context whenever the task starts or resumes execution. + cyclic isolate + A subgroup of one or more objects that reference each other in a reference + cycle, but are not referenced by objects outside the group. The goal of + the :term:`cyclic garbage collector ` is to identify these groups and break the reference + cycles so that the memory can be reclaimed. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for @@ -429,6 +437,11 @@ Glossary with :term:`abstract base classes `.) Instead, it typically employs :func:`hasattr` tests or :term:`EAFP` programming. + dunder + An informal short-hand for "double underscore", used when talking about a + :term:`special method`. For example, ``__init__`` is often pronounced + "dunder init". + EAFP Easier to ask for forgiveness than permission. This common Python coding style assumes the existence of valid keys or attributes and catches @@ -456,7 +469,8 @@ Glossary core and with user code. f-string - String literals prefixed with ``'f'`` or ``'F'`` are commonly called + f-strings + String literals prefixed with ``f`` or ``F`` are commonly called "f-strings" which is short for :ref:`formatted string literals `. See also :pep:`498`. @@ -1011,6 +1025,15 @@ Glossary applied to all scopes, only those relying on a known set of local and nonlocal variable names are restricted to optimized scopes. + optional module + An :term:`extension module` that is part of the :term:`standard library`, + but may be absent in some builds of :term:`CPython`, + usually due to missing third-party libraries or because the module + is not available for a given platform. + + See :ref:`optional-module-requirements` for a list of optional modules + that require third-party libraries. + package A Python :term:`module` which can contain submodules or recursively, subpackages. Technically, a package is a Python module with a @@ -1202,6 +1225,11 @@ Glossary :func:`sys.getrefcount` function to return the reference count for a particular object. + In :term:`CPython`, reference counts are not considered to be stable + or well-defined values; the number of references to an object, and how + that number is affected by Python code, may be different between + versions. + regular package A traditional :term:`package`, such as a directory containing an ``__init__.py`` file. @@ -1232,8 +1260,9 @@ Glossary The :class:`collections.abc.Sequence` abstract base class defines a much richer interface that goes beyond just :meth:`~object.__getitem__` and :meth:`~object.__len__`, adding - :meth:`!count`, :meth:`!index`, :meth:`~object.__contains__`, and - :meth:`~object.__reversed__`. Types that implement this expanded + :meth:`~sequence.count`, :meth:`~sequence.index`, + :meth:`~object.__contains__`, and :meth:`~object.__reversed__`. + Types that implement this expanded interface can be registered explicitly using :func:`~abc.ABCMeta.register`. For more documentation on sequence methods generally, see @@ -1274,6 +1303,16 @@ Glossary and ending with double underscores. Special methods are documented in :ref:`specialnames`. + standard library + The collection of :term:`packages `, :term:`modules ` + and :term:`extension modules ` distributed as a part + of the official Python interpreter package. The exact membership of the + collection may vary based on platform, available system libraries, or + other criteria. Documentation can be found at :ref:`library-index`. + + See also :data:`sys.stdlib_module_names` for a list of all possible + standard library module names. + statement A statement is part of a suite (a "block" of code). A statement is either an :term:`expression` or one of several constructs with a keyword, such @@ -1284,6 +1323,9 @@ Glossary issues such as incorrect types. See also :term:`type hints ` and the :mod:`typing` module. + stdlib + An abbreviation of :term:`standard library`. + strong reference In Python's C API, a strong reference is a reference to an object which is owned by the code holding the reference. The strong @@ -1298,6 +1340,12 @@ Glossary See also :term:`borrowed reference`. + t-string + t-strings + String literals prefixed with ``t`` or ``T`` are commonly called + "t-strings" which is short for + :ref:`template string literals `. + text encoding A string in Python is a sequence of Unicode code points (in range ``U+0000``--``U+10FFFF``). To store or transfer a string, it needs to be @@ -1443,6 +1491,11 @@ Glossary A computer defined entirely in software. Python's virtual machine executes the :term:`bytecode` emitted by the bytecode compiler. + walrus operator + A light-hearted way to refer to the :ref:`assignment expression + ` operator ``:=`` because it looks a bit like a + walrus if you turn your head. + Zen of Python Listing of Python design principles and philosophies that are helpful in understanding and using the language. The listing can be found by typing diff --git a/Doc/howto/a-conceptual-overview-of-asyncio.rst b/Doc/howto/a-conceptual-overview-of-asyncio.rst new file mode 100644 index 00000000000000..926e781dbdc658 --- /dev/null +++ b/Doc/howto/a-conceptual-overview-of-asyncio.rst @@ -0,0 +1,608 @@ +.. _a-conceptual-overview-of-asyncio: + +**************************************** +A Conceptual Overview of :mod:`!asyncio` +**************************************** + +This :ref:`HOWTO ` article seeks to help you build a sturdy mental +model of how :mod:`asyncio` fundamentally works, helping you understand the +how and why behind the recommended patterns. + +You might be curious about some key :mod:`!asyncio` concepts. +By the end of this article, you'll be able to comfortably answer these questions: + +- What's happening behind the scenes when an object is awaited? +- How does :mod:`!asyncio` differentiate between a task which doesn't need + CPU time (such as a network request or file read) as opposed to a task that + does (such as computing n-factorial)? +- How to write an asynchronous variant of an operation, such as + an async sleep or database request. + +.. seealso:: + + * The `guide `_ that inspired this HOWTO article, by Alexander Nordin. + * This in-depth `YouTube tutorial series `_ on + ``asyncio`` created by Python core team member, Łukasz Langa. + * `500 Lines or Less: A Web Crawler With asyncio Coroutines `_ by A. + Jesse Jiryu Davis and Guido van Rossum. + +-------------------------------------------- +A conceptual overview part 1: the high-level +-------------------------------------------- + +In part 1, we'll cover the main, high-level building blocks of :mod:`!asyncio`: +the event loop, coroutine functions, coroutine objects, tasks, and ``await``. + +========== +Event Loop +========== + +Everything in :mod:`!asyncio` happens relative to the event loop. +It's the star of the show. +It's like an orchestra conductor. +It's behind the scenes managing resources. +Some power is explicitly granted to it, but a lot of its ability to get things +done comes from the respect and cooperation of its worker bees. + +In more technical terms, the event loop contains a collection of jobs to be run. +Some jobs are added directly by you, and some indirectly by :mod:`!asyncio`. +The event loop takes a job from its backlog of work and invokes it (or "gives +it control"), similar to calling a function, and then that job runs. +Once it pauses or completes, it returns control to the event loop. +The event loop will then select another job from its pool and invoke it. +You can *roughly* think of the collection of jobs as a queue: jobs are added and +then processed one at a time, generally (but not always) in order. +This process repeats indefinitely, with the event loop cycling endlessly +onwards. +If there are no more jobs pending execution, the event loop is smart enough to +rest and avoid needlessly wasting CPU cycles, and will come back when there's +more work to be done. + +Effective execution relies on jobs sharing well and cooperating; a greedy job +could hog control and leave the other jobs to starve, rendering the overall +event loop approach rather useless. + +:: + + import asyncio + + # This creates an event loop and indefinitely cycles through + # its collection of jobs. + event_loop = asyncio.new_event_loop() + event_loop.run_forever() + +===================================== +Asynchronous functions and coroutines +===================================== + +This is a basic, boring Python function:: + + def hello_printer(): + print( + "Hi, I am a lowly, simple printer, though I have all I " + "need in life -- \nfresh paper and my dearly beloved octopus " + "partner in crime." + ) + +Calling a regular function invokes its logic or body:: + + >>> hello_printer() + Hi, I am a lowly, simple printer, though I have all I need in life -- + fresh paper and my dearly beloved octopus partner in crime. + +The :ref:`async def `, as opposed to just a plain ``def``, makes +this an asynchronous function (or "coroutine function"). +Calling it creates and returns a :ref:`coroutine ` object. + +:: + + async def loudmouth_penguin(magic_number: int): + print( + "I am a super special talking penguin. Far cooler than that printer. " + f"By the way, my lucky number is: {magic_number}." + ) + +Calling the async function, ``loudmouth_penguin``, does not execute the print statement; +instead, it creates a coroutine object:: + + >>> loudmouth_penguin(magic_number=3) + + +The terms "coroutine function" and "coroutine object" are often conflated +as coroutine. +That can be confusing! +In this article, coroutine specifically refers to a coroutine object, or more +precisely, an instance of :data:`types.CoroutineType` (native coroutine). +Note that coroutines can also exist as instances of +:class:`collections.abc.Coroutine` -- a distinction that matters for type +checking. + +A coroutine represents the function's body or logic. +A coroutine has to be explicitly started; again, merely creating the coroutine +does not start it. +Notably, the coroutine can be paused and resumed at various points within the +function's body. +That pausing and resuming ability is what allows for asynchronous behavior! + +Coroutines and coroutine functions were built by leveraging the functionality +of :term:`generators ` and +:term:`generator functions `. +Recall, a generator function is a function that :keyword:`yield`\s, like this +one:: + + def get_random_number(): + # This would be a bad random number generator! + print("Hi") + yield 1 + print("Hello") + yield 7 + print("Howdy") + yield 4 + ... + +Similar to a coroutine function, calling a generator function does not run it. +Instead, it creates a generator object:: + + >>> get_random_number() + + +You can proceed to the next ``yield`` of a generator by using the +built-in function :func:`next`. +In other words, the generator runs, then pauses. +For example:: + + >>> generator = get_random_number() + >>> next(generator) + Hi + 1 + >>> next(generator) + Hello + 7 + +===== +Tasks +===== + +Roughly speaking, :ref:`tasks ` are coroutines (not coroutine +functions) tied to an event loop. +A task also maintains a list of callback functions whose importance will become +clear in a moment when we discuss :keyword:`await`. +The recommended way to create tasks is via :func:`asyncio.create_task`. + +Creating a task automatically schedules it for execution (by adding a +callback to run it in the event loop's to-do list, that is, collection of jobs). + +:mod:`!asyncio` automatically associates tasks with the event loop for you. +This automatic association was purposely designed into :mod:`!asyncio` for +the sake of simplicity. +Without it, you'd have to keep track of the event loop object and pass it to +any coroutine function that wants to create tasks, adding redundant clutter +to your code. + +:: + + coroutine = loudmouth_penguin(magic_number=5) + # This creates a Task object and schedules its execution via the event loop. + task = asyncio.create_task(coroutine) + +Earlier, we manually created the event loop and set it to run forever. +In practice, it's recommended to use (and common to see) :func:`asyncio.run`, +which takes care of managing the event loop and ensuring the provided +coroutine finishes before advancing. +For example, many async programs follow this setup:: + + import asyncio + + async def main(): + # Perform all sorts of wacky, wild asynchronous things... + ... + + if __name__ == "__main__": + asyncio.run(main()) + # The program will not reach the following print statement until the + # coroutine main() finishes. + print("coroutine main() is done!") + +It's important to be aware that the task itself is not added to the event loop, +only a callback to the task is. +This matters if the task object you created is garbage collected before it's +called by the event loop. +For example, consider this program: + +.. code-block:: + :linenos: + + async def hello(): + print("hello!") + + async def main(): + asyncio.create_task(hello()) + # Other asynchronous instructions which run for a while + # and cede control to the event loop... + ... + + asyncio.run(main()) + +Because there's no reference to the task object created on line 5, it *might* +be garbage collected before the event loop invokes it. +Later instructions in the coroutine ``main()`` hand control back to the event +loop so it can invoke other jobs. +When the event loop eventually tries to run the task, it might fail and +discover the task object does not exist! +This can also happen even if a coroutine keeps a reference to a task but +completes before that task finishes. +When the coroutine exits, local variables go out of scope and may be subject +to garbage collection. +In practice, ``asyncio`` and Python's garbage collector work pretty hard to +ensure this sort of thing doesn't happen. +But that's no reason to be reckless! + +===== +await +===== + +:keyword:`await` is a Python keyword that's commonly used in one of two +different ways:: + + await task + await coroutine + +In a crucial way, the behavior of ``await`` depends on the type of object +being awaited. + +Awaiting a task will cede control from the current task or coroutine to +the event loop. +In the process of relinquishing control, a few important things happen. +We'll use the following code example to illustrate:: + + async def plant_a_tree(): + dig_the_hole_task = asyncio.create_task(dig_the_hole()) + await dig_the_hole_task + + # Other instructions associated with planting a tree. + ... + +In this example, imagine the event loop has passed control to the start of the +coroutine ``plant_a_tree()``. +As seen above, the coroutine creates a task and then awaits it. +The ``await dig_the_hole_task`` instruction adds a callback (which will resume +``plant_a_tree()``) to the ``dig_the_hole_task`` object's list of callbacks. +And then, the instruction cedes control to the event loop. +Some time later, the event loop will pass control to ``dig_the_hole_task`` +and the task will finish whatever it needs to do. +Once the task finishes, it will add its various callbacks to the event loop, +in this case, a call to resume ``plant_a_tree()``. + +Generally speaking, when the awaited task finishes (``dig_the_hole_task``), +the original task or coroutine (``plant_a_tree()``) is added back to the event +loop's to-do list to be resumed. + +This is a basic, yet reliable mental model. +In practice, the control handoffs are slightly more complex, but not by much. +In part 2, we'll walk through the details that make this possible. + +**Unlike tasks, awaiting a coroutine does not hand control back to the event +loop!** +Wrapping a coroutine in a task first, then awaiting that would cede +control. +The behavior of ``await coroutine`` is effectively the same as invoking a +regular, synchronous Python function. +Consider this program:: + + import asyncio + + async def coro_a(): + print("I am coro_a(). Hi!") + + async def coro_b(): + print("I am coro_b(). I sure hope no one hogs the event loop...") + + async def main(): + task_b = asyncio.create_task(coro_b()) + num_repeats = 3 + for _ in range(num_repeats): + await coro_a() + await task_b + + asyncio.run(main()) + +The first statement in the coroutine ``main()`` creates ``task_b`` and schedules +it for execution via the event loop. +Then, ``coro_a()`` is repeatedly awaited. Control never cedes to the +event loop, which is why we see the output of all three ``coro_a()`` +invocations before ``coro_b()``'s output: + +.. code-block:: none + + I am coro_a(). Hi! + I am coro_a(). Hi! + I am coro_a(). Hi! + I am coro_b(). I sure hope no one hogs the event loop... + +If we change ``await coro_a()`` to ``await asyncio.create_task(coro_a())``, the +behavior changes. +The coroutine ``main()`` cedes control to the event loop with that statement. +The event loop then proceeds through its backlog of work, calling ``task_b`` +and then the task which wraps ``coro_a()`` before resuming the coroutine +``main()``. + +.. code-block:: none + + I am coro_b(). I sure hope no one hogs the event loop... + I am coro_a(). Hi! + I am coro_a(). Hi! + I am coro_a(). Hi! + +This behavior of ``await coroutine`` can trip a lot of people up! +That example highlights how using only ``await coroutine`` could +unintentionally hog control from other tasks and effectively stall the event +loop. +:func:`asyncio.run` can help you detect such occurrences via the +``debug=True`` flag, which enables +:ref:`debug mode `. +Among other things, it will log any coroutines that monopolize execution for +100ms or longer. + +The design intentionally trades off some conceptual clarity around usage of +``await`` for improved performance. +Each time a task is awaited, control needs to be passed all the way up the +call stack to the event loop. +That might sound minor, but in a large program with many ``await`` statements and a deep +call stack, that overhead can add up to a meaningful performance drag. + +------------------------------------------------ +A conceptual overview part 2: the nuts and bolts +------------------------------------------------ + +Part 2 goes into detail on the mechanisms :mod:`!asyncio` uses to manage +control flow. +This is where the magic happens. +You'll come away from this section knowing what ``await`` does behind the scenes +and how to make your own asynchronous operators. + +================================ +The inner workings of coroutines +================================ + +:mod:`!asyncio` leverages four components to pass around control. + +:meth:`coroutine.send(arg) ` is the method used to start or +resume a coroutine. +If the coroutine was paused and is now being resumed, the argument ``arg`` +will be sent in as the return value of the ``yield`` statement which originally +paused it. +If the coroutine is being used for the first time (as opposed to being resumed), +``arg`` must be ``None``. + +.. code-block:: + :linenos: + + class Rock: + def __await__(self): + value_sent_in = yield 7 + print(f"Rock.__await__ resuming with value: {value_sent_in}.") + return value_sent_in + + async def main(): + print("Beginning coroutine main().") + rock = Rock() + print("Awaiting rock...") + value_from_rock = await rock + print(f"Coroutine received value: {value_from_rock} from rock.") + return 23 + + coroutine = main() + intermediate_result = coroutine.send(None) + print(f"Coroutine paused and returned intermediate value: {intermediate_result}.") + + print(f"Resuming coroutine and sending in value: 42.") + try: + coroutine.send(42) + except StopIteration as e: + returned_value = e.value + print(f"Coroutine main() finished and provided value: {returned_value}.") + +:ref:`yield `, as usual, pauses execution and returns control +to the caller. +In the example above, the ``yield``, on line 3, is called by +``... = await rock`` on line 11. +More broadly speaking, ``await`` calls the :meth:`~object.__await__` method of +the given object. +``await`` also does one more very special thing: it propagates (or "passes +along") any ``yield``\ s it receives up the call chain. +In this case, that's back to ``... = coroutine.send(None)`` on line 16. + +The coroutine is resumed via the ``coroutine.send(42)`` call on line 21. +The coroutine picks back up from where it ``yield``\ ed (or paused) on line 3 +and executes the remaining statements in its body. +When a coroutine finishes, it raises a :exc:`StopIteration` exception with the +return value attached in the :attr:`~StopIteration.value` attribute. + +That snippet produces this output: + +.. code-block:: none + + Beginning coroutine main(). + Awaiting rock... + Coroutine paused and returned intermediate value: 7. + Resuming coroutine and sending in value: 42. + Rock.__await__ resuming with value: 42. + Coroutine received value: 42 from rock. + Coroutine main() finished and provided value: 23. + +It's worth pausing for a moment here and making sure you followed the various +ways that control flow and values were passed. A lot of important ideas were +covered and it's worth ensuring your understanding is firm. + +The only way to yield (or effectively cede control) from a coroutine is to +``await`` an object that ``yield``\ s in its ``__await__`` method. +That might sound odd to you. You might be thinking: + + 1. What about a ``yield`` directly within the coroutine function? The + coroutine function becomes an + :ref:`async generator function `, a + different beast entirely. + + 2. What about a :ref:`yield from ` within the coroutine function to a (plain) + generator? + That causes the error: ``SyntaxError: yield from not allowed in a coroutine.`` + This was intentionally designed for the sake of simplicity -- mandating only + one way of using coroutines. + Initially ``yield`` was barred as well, but was re-accepted to allow for + async generators. + Despite that, ``yield from`` and ``await`` effectively do the same thing. + +======= +Futures +======= + +A :ref:`future ` is an object meant to represent a +computation's status and result. +The term is a nod to the idea of something still to come or not yet happened, +and the object is a way to keep an eye on that something. + +A future has a few important attributes. One is its state, which can be either +"pending", "cancelled", or "done". +Another is its result, which is set when the state transitions to done. +Unlike a coroutine, a future does not represent the actual computation to be +done; instead, it represents the status and result of that computation, kind of +like a status light (red, yellow, or green) or indicator. + +:class:`asyncio.Task` subclasses :class:`asyncio.Future` in order to gain +these various capabilities. +The prior section said tasks store a list of callbacks, which wasn't entirely +accurate. +It's actually the ``Future`` class that implements this logic, which ``Task`` +inherits. + +Futures may also be used directly (not via tasks). +Tasks mark themselves as done when their coroutine is complete. +Futures are much more versatile and will be marked as done when you say so. +In this way, they're the flexible interface for you to make your own conditions +for waiting and resuming. + +======================== +A homemade asyncio.sleep +======================== + +We'll go through an example of how you could leverage a future to create your +own variant of asynchronous sleep (``async_sleep``) which mimics +:func:`asyncio.sleep`. + +This snippet registers a few tasks with the event loop and then awaits the task +created by ``asyncio.create_task``, which wraps the ``async_sleep(3)`` coroutine. +We want that task to finish only after three seconds have elapsed, but without +preventing other tasks from running. + +:: + + async def other_work(): + print("I like work. Work work.") + + async def main(): + # Add a few other tasks to the event loop, so there's something + # to do while asynchronously sleeping. + work_tasks = [ + asyncio.create_task(other_work()), + asyncio.create_task(other_work()), + asyncio.create_task(other_work()) + ] + print( + "Beginning asynchronous sleep at time: " + f"{datetime.datetime.now().strftime("%H:%M:%S")}." + ) + await asyncio.create_task(async_sleep(3)) + print( + "Done asynchronous sleep at time: " + f"{datetime.datetime.now().strftime("%H:%M:%S")}." + ) + # asyncio.gather effectively awaits each task in the collection. + await asyncio.gather(*work_tasks) + + +Below, we use a future to enable custom control over when that task will be +marked as done. +If :meth:`future.set_result() ` (the method +responsible for marking that future as done) is never called, then this task +will never finish. +We've also enlisted the help of another task, which we'll see in a moment, that +will monitor how much time has elapsed and, accordingly, call +``future.set_result()``. + +:: + + async def async_sleep(seconds: float): + future = asyncio.Future() + time_to_wake = time.time() + seconds + # Add the watcher-task to the event loop. + watcher_task = asyncio.create_task(_sleep_watcher(future, time_to_wake)) + # Block until the future is marked as done. + await future + +Below, we use a rather bare ``YieldToEventLoop()`` object to ``yield`` +from its ``__await__`` method, ceding control to the event loop. +This is effectively the same as calling ``asyncio.sleep(0)``, but this approach +offers more clarity, not to mention it's somewhat cheating to use +``asyncio.sleep`` when showcasing how to implement it! + +As usual, the event loop cycles through its tasks, giving them control +and receiving control back when they pause or finish. +The ``watcher_task``, which runs the coroutine ``_sleep_watcher(...)``, will +be invoked once per full cycle of the event loop. +On each resumption, it'll check the time and if not enough has elapsed, then +it'll pause once again and hand control back to the event loop. +Once enough time has elapsed, ``_sleep_watcher(...)`` +marks the future as done and completes by exiting its +infinite ``while`` loop. +Given this helper task is only invoked once per cycle of the event loop, +you'd be correct to note that this asynchronous sleep will sleep *at least* +three seconds, rather than exactly three seconds. +Note this is also true of ``asyncio.sleep``. + +:: + + class YieldToEventLoop: + def __await__(self): + yield + + async def _sleep_watcher(future, time_to_wake): + while True: + if time.time() >= time_to_wake: + # This marks the future as done. + future.set_result(None) + break + else: + await YieldToEventLoop() + +Here is the full program's output: + +.. code-block:: none + + $ python custom-async-sleep.py + Beginning asynchronous sleep at time: 14:52:22. + I like work. Work work. + I like work. Work work. + I like work. Work work. + Done asynchronous sleep at time: 14:52:25. + +You might feel this implementation of asynchronous sleep was unnecessarily +convoluted. +And, well, it was. +The example was meant to showcase the versatility of futures with a simple +example that could be mimicked for more complex needs. +For reference, you could implement it without futures, like so:: + + async def simpler_async_sleep(seconds): + time_to_wake = time.time() + seconds + while True: + if time.time() >= time_to_wake: + return + else: + await YieldToEventLoop() + +But that's all for now. Hopefully you're ready to more confidently dive into +some async programming or check out advanced topics in the +:mod:`rest of the documentation `. diff --git a/Doc/howto/annotations.rst b/Doc/howto/annotations.rst index 78f3704ba5d000..d7deb6c6bc1768 100644 --- a/Doc/howto/annotations.rst +++ b/Doc/howto/annotations.rst @@ -248,4 +248,9 @@ quirks by using :func:`annotationlib.get_annotations` on Python 3.14+ or :func:`inspect.get_annotations` on Python 3.10+. On earlier versions of Python, you can avoid these bugs by accessing the annotations from the class's :attr:`~type.__dict__` -(e.g., ``cls.__dict__.get('__annotations__', None)``). +(for example, ``cls.__dict__.get('__annotations__', None)``). + +In some versions of Python, instances of classes may have an ``__annotations__`` +attribute. However, this is not supported functionality. If you need the +annotations of an instance, you can use :func:`type` to access its class +(for example, ``annotationlib.get_annotations(type(myinstance))`` on Python 3.14+). diff --git a/Doc/howto/cporting.rst b/Doc/howto/cporting.rst index 7773620b40b973..cf857aed0425ec 100644 --- a/Doc/howto/cporting.rst +++ b/Doc/howto/cporting.rst @@ -14,13 +14,11 @@ We recommend the following resources for porting extension modules to Python 3: module. * The `Porting guide`_ from the *py3c* project provides opinionated suggestions with supporting code. -* The `Cython`_ and `CFFI`_ libraries offer abstractions over - Python's C API. +* :ref:`Recommended third party tools ` offer abstractions over + the Python's C API. Extensions generally need to be re-written to use one of them, but the library then handles differences between various Python versions and implementations. .. _Migrating C extensions: http://python3porting.com/cextensions.html .. _Porting guide: https://py3c.readthedocs.io/en/latest/guide.html -.. _Cython: https://cython.org/ -.. _CFFI: https://cffi.readthedocs.io/en/latest/ diff --git a/Doc/howto/descriptor.rst b/Doc/howto/descriptor.rst index f6c3e473f1c36d..9d5a9ac8b718cb 100644 --- a/Doc/howto/descriptor.rst +++ b/Doc/howto/descriptor.rst @@ -420,7 +420,7 @@ Here are three practical data validation utilities: def validate(self, value): if not isinstance(value, str): - raise TypeError(f'Expected {value!r} to be an str') + raise TypeError(f'Expected {value!r} to be a str') if self.minsize is not None and len(value) < self.minsize: raise ValueError( f'Expected {value!r} to be no smaller than {self.minsize!r}' diff --git a/Doc/howto/enum.rst b/Doc/howto/enum.rst index 6441b7aed1eda8..7713aede6d564a 100644 --- a/Doc/howto/enum.rst +++ b/Doc/howto/enum.rst @@ -990,9 +990,9 @@ Supported ``_sunder_`` names from the final class - :meth:`~Enum._generate_next_value_` -- used to get an appropriate value for an enum member; may be overridden -- :meth:`~EnumType._add_alias_` -- adds a new name as an alias to an existing +- :meth:`~Enum._add_alias_` -- adds a new name as an alias to an existing member. -- :meth:`~EnumType._add_value_alias_` -- adds a new value as an alias to an +- :meth:`~Enum._add_value_alias_` -- adds a new value as an alias to an existing member. See `MultiValueEnum`_ for an example. .. note:: diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 3f6ee517050bd8..fe960f90e44168 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -6,8 +6,8 @@ C API Extension Support for Free Threading ****************************************** -Starting with the 3.13 release, CPython has experimental support for running -with the :term:`global interpreter lock` (GIL) disabled in a configuration +Starting with the 3.13 release, CPython has support for running with +the :term:`global interpreter lock` (GIL) disabled in a configuration called :term:`free threading`. This document describes how to adapt C API extensions to support free threading. @@ -23,6 +23,14 @@ You can use it to enable code that only runs under the free-threaded build:: /* code that only runs in the free-threaded build */ #endif +.. note:: + + On Windows, this macro is not defined automatically, but must be specified + to the compiler when building. The :func:`sysconfig.get_config_var` function + can be used to determine whether the current running interpreter had the + macro defined. + + Module Initialization ===================== @@ -153,6 +161,8 @@ that return :term:`strong references `. +===================================+===================================+ | :c:func:`PyList_GetItem` | :c:func:`PyList_GetItemRef` | +-----------------------------------+-----------------------------------+ +| :c:func:`PyList_GET_ITEM` | :c:func:`PyList_GetItemRef` | ++-----------------------------------+-----------------------------------+ | :c:func:`PyDict_GetItem` | :c:func:`PyDict_GetItemRef` | +-----------------------------------+-----------------------------------+ | :c:func:`PyDict_GetItemWithError` | :c:func:`PyDict_GetItemRef` | @@ -193,7 +203,7 @@ Memory Allocation APIs Python's memory management C API provides functions in three different :ref:`allocation domains `: "raw", "mem", and "object". For thread-safety, the free-threaded build requires that only Python objects -are allocated using the object domain, and that all Python object are +are allocated using the object domain, and that all Python objects are allocated using that domain. This differs from the prior Python versions, where this was only a best practice and not a hard requirement. @@ -334,12 +344,12 @@ This means you cannot rely on nested critical sections to lock multiple objects at once, as the inner critical section may suspend the outer ones. Instead, use :c:macro:`Py_BEGIN_CRITICAL_SECTION2` to lock two objects simultaneously. -Note that the locks described above are only :c:type:`!PyMutex` based locks. +Note that the locks described above are only :c:type:`PyMutex` based locks. The critical section implementation does not know about or affect other locking mechanisms that might be in use, like POSIX mutexes. Also note that while -blocking on any :c:type:`!PyMutex` causes the critical sections to be +blocking on any :c:type:`PyMutex` causes the critical sections to be suspended, only the mutexes that are part of the critical sections are -released. If :c:type:`!PyMutex` is used without a critical section, it will +released. If :c:type:`PyMutex` is used without a critical section, it will not be released and therefore does not get the same deadlock avoidance. Important Considerations @@ -387,8 +397,9 @@ The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. * `pypa/manylinux `_ supports the free-threaded build, with the ``t`` suffix, such as ``python3.13t``. * `pypa/cibuildwheel `_ supports the - free-threaded build if you set - `CIBW_FREE_THREADED_SUPPORT `_. + free-threaded build on Python 3.13 and 3.14. On Python 3.14, free-threaded + wheels will be built by default. On Python 3.13, you will need to set + `CIBW_ENABLE to cpython-freethreading `_. Limited C API and Stable ABI ............................ diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index f7a894ac2cd78e..380c2be04957d5 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -1,18 +1,19 @@ .. _freethreading-python-howto: -********************************************** -Python experimental support for free threading -********************************************** +********************************* +Python support for free threading +********************************* -Starting with the 3.13 release, CPython has experimental support for a build of +Starting with the 3.13 release, CPython has support for a build of Python called :term:`free threading` where the :term:`global interpreter lock` (GIL) is disabled. Free-threaded execution allows for full utilization of the available processing power by running threads in parallel on available CPU cores. While not all software will benefit from this automatically, programs designed with threading in mind will run faster on multi-core hardware. -**The free-threaded mode is experimental** and work is ongoing to improve it: -expect some bugs and a substantial single-threaded performance hit. +Some third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. This document describes the implications of free threading for Python code. See :ref:`freethreading-extensions-howto` for information on @@ -32,7 +33,7 @@ optionally support installing free-threaded Python binaries. The installers are available at https://www.python.org/downloads/. For information on other platforms, see the `Installing a Free-Threaded Python -`_, a +`_, a community-maintained installation guide for installing free-threaded Python. When building CPython from source, the :option:`--disable-gil` configure option @@ -43,7 +44,7 @@ Identifying free-threaded Python ================================ To check if the current interpreter supports free-threading, :option:`python -VV <-V>` -and :data:`sys.version` contain "experimental free-threading build". +and :data:`sys.version` contain "free-threading build". The new :func:`sys._is_gil_enabled` function can be used to check whether the GIL is actually disabled in the running process. @@ -98,60 +99,42 @@ This section describes known limitations of the free-threaded CPython build. Immortalization --------------- -The free-threaded build of the 3.13 release makes some objects :term:`immortal`. +In the free-threaded build, some objects are :term:`immortal`. Immortal objects are not deallocated and have reference counts that are never modified. This is done to avoid reference count contention that would prevent efficient multi-threaded scaling. -An object will be made immortal when a new thread is started for the first time -after the main thread is running. The following objects are immortalized: +As of the 3.14 release, immortalization is limited to: -* :ref:`function ` objects declared at the module level -* :ref:`method ` descriptors -* :ref:`code ` objects -* :term:`module` objects and their dictionaries -* :ref:`classes ` (type objects) - -Because immortal objects are never deallocated, applications that create many -objects of these types may see increased memory usage. This is expected to be -addressed in the 3.14 release. - -Additionally, numeric and string literals in the code as well as strings -returned by :func:`sys.intern` are also immortalized. This behavior is -expected to remain in the 3.14 free-threaded build. +* Code constants: numeric literals, string literals, and tuple literals + composed of other constants. +* Strings interned by :func:`sys.intern`. Frame objects ------------- -It is not safe to access :ref:`frame ` objects from other -threads and doing so may cause your program to crash . This means that -:func:`sys._current_frames` is generally not safe to use in a free-threaded -build. Functions like :func:`inspect.currentframe` and :func:`sys._getframe` -are generally safe as long as the resulting frame object is not passed to -another thread. +It is not safe to access :attr:`frame.f_locals` from a :ref:`frame ` +object if that frame is currently executing in another thread, and doing so may +crash the interpreter. + Iterators --------- -Sharing the same iterator object between multiple threads is generally not -safe and threads may see duplicate or missing elements when iterating or crash -the interpreter. +It is generally not thread-safe to access the same iterator object from +multiple threads concurrently, and threads may see duplicate or missing +elements. Single-threaded performance --------------------------- The free-threaded build has additional overhead when executing Python code -compared to the default GIL-enabled build. In 3.13, this overhead is about -40% on the `pyperformance `_ suite. -Programs that spend most of their time in C extensions or I/O will see -less of an impact. The largest impact is because the specializing adaptive -interpreter (:pep:`659`) is disabled in the free-threaded build. We expect -to re-enable it in a thread-safe way in the 3.14 release. This overhead is -expected to be reduced in upcoming Python release. We are aiming for an -overhead of 10% or less on the pyperformance suite compared to the default -GIL-enabled build. +compared to the default GIL-enabled build. The amount of overhead depends +on the workload and hardware. On the pyperformance benchmark suite, the +average overhead ranges from about 1% on macOS aarch64 to 8% on x86-64 Linux +systems. Behavioral changes diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 1f0608fb0fc53f..320dcf7eaca113 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -4,7 +4,7 @@ Functional Programming HOWTO ******************************** -:Author: A. M. Kuchling +:Author: \A. M. Kuchling :Release: 0.32 In this document, we'll take a tour of Python's features suitable for @@ -602,7 +602,7 @@ generators: raise an exception inside the generator; the exception is raised by the ``yield`` expression where the generator's execution is paused. -* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the +* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the generator to terminate the iteration. On receiving this exception, the generator's code must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the exception and doing anything else is @@ -1217,7 +1217,7 @@ flow inside a program. The book uses Scheme for its examples, but many of the design approaches described in these chapters are applicable to functional-style Python code. -https://www.defmacro.org/ramblings/fp.html: A general introduction to functional +https://defmacro.org/2006/06/19/fp.html: A general introduction to functional programming that uses Java examples and has a lengthy historical introduction. https://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst index f350141004c2db..81fc7e63f35bd7 100644 --- a/Doc/howto/index.rst +++ b/Doc/howto/index.rst @@ -1,3 +1,5 @@ +.. _how-tos: + *************** Python HOWTOs *************** @@ -11,6 +13,7 @@ Python Library Reference. :maxdepth: 1 :hidden: + a-conceptual-overview-of-asyncio.rst cporting.rst curses.rst descriptor.rst @@ -38,6 +41,7 @@ Python Library Reference. General: +* :ref:`a-conceptual-overview-of-asyncio` * :ref:`annotations-howto` * :ref:`argparse-tutorial` * :ref:`descriptorhowto` diff --git a/Doc/howto/instrumentation.rst b/Doc/howto/instrumentation.rst index 6e03ef20a21fa3..b3db1189e5dcbc 100644 --- a/Doc/howto/instrumentation.rst +++ b/Doc/howto/instrumentation.rst @@ -269,6 +269,8 @@ should instead read: (assuming a :ref:`debug build ` of CPython 3.6) +.. _static-markers: + Available static markers ------------------------ diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index a636e06bda8344..6092c75f48fdef 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -168,7 +168,7 @@ possible, consider explicit locking. If it is necessary to use process-global state, the simplest way to avoid issues with multiple interpreters is to explicitly prevent a module from being loaded more than once per process—see -`Opt-Out: Limiting to One Module Object per Process`_. +:ref:`isolating-extensions-optout`. Managing Per-Module State @@ -207,6 +207,8 @@ An example of a module with per-module state is currently available as example module initialization shown at the bottom of the file. +.. _isolating-extensions-optout: + Opt-Out: Limiting to One Module Object per Process -------------------------------------------------- @@ -215,21 +217,36 @@ multiple interpreters correctly. If this is not yet the case for your module, you can explicitly make your module loadable only once per process. For example:: + // A process-wide flag static int loaded = 0; + // Mutex to provide thread safety (only needed for free-threaded Python) + static PyMutex modinit_mutex = {0}; + static int exec_module(PyObject* module) { + PyMutex_Lock(&modinit_mutex); if (loaded) { + PyMutex_Unlock(&modinit_mutex); PyErr_SetString(PyExc_ImportError, "cannot load module more than once per process"); return -1; } loaded = 1; + PyMutex_Unlock(&modinit_mutex); // ... rest of initialization } +If your module's :c:member:`PyModuleDef.m_clear` function is able to prepare +for future re-initialization, it should clear the ``loaded`` flag. +In this case, your module won't support multiple instances existing +*concurrently*, but it will, for example, support being loaded after +Python runtime shutdown (:c:func:`Py_FinalizeEx`) and re-initialization +(:c:func:`Py_Initialize`). + + Module State Access from Functions ---------------------------------- @@ -336,7 +353,7 @@ garbage collection protocol. That is, heap types should: - Have the :c:macro:`Py_TPFLAGS_HAVE_GC` flag. -- Define a traverse function using ``Py_tp_traverse``, which +- Define a traverse function using :c:data:`Py_tp_traverse`, which visits the type (e.g. using ``Py_VISIT(Py_TYPE(self))``). Please refer to the documentation of @@ -436,7 +453,7 @@ Avoiding ``PyObject_New`` GC-tracked objects need to be allocated using GC-aware functions. -If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: +If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: @@ -609,8 +626,7 @@ Open Issues Several issues around per-module state and heap types are still open. -Discussions about improving the situation are best held on the `capi-sig -mailing list `__. +Discussions about improving the situation are best held on the `discuss forum under c-api tag `__. Per-Class Scope diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 7d64a02358adb3..52537a91df542c 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -4078,6 +4078,104 @@ lines. With this approach, you get better output: WARNING:demo: 1/0 WARNING:demo:ZeroDivisionError: division by zero +How to uniformly handle newlines in logging output +-------------------------------------------------- + +Usually, messages that are logged (say to console or file) consist of a single +line of text. However, sometimes there is a need to handle messages with +multiple lines - whether because a logging format string contains newlines, or +logged data contains newlines. If you want to handle such messages uniformly, so +that each line in the logged message appears uniformly formatted as if it was +logged separately, you can do this using a handler mixin, as in the following +snippet: + +.. code-block:: python + + # Assume this is in a module mymixins.py + import copy + + class MultilineMixin: + def emit(self, record): + s = record.getMessage() + if '\n' not in s: + super().emit(record) + else: + lines = s.splitlines() + rec = copy.copy(record) + rec.args = None + for line in lines: + rec.msg = line + super().emit(rec) + +You can use the mixin as in the following script: + +.. code-block:: python + + import logging + + from mymixins import MultilineMixin + + logger = logging.getLogger(__name__) + + class StreamHandler(MultilineMixin, logging.StreamHandler): + pass + + if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-9s %(message)s', + handlers = [StreamHandler()]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +The script, when run, prints something like: + +.. code-block:: text + + 2025-07-02 13:54:47,234 DEBUG Single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me once ... + 2025-07-02 13:54:47,234 DEBUG Another single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me ... + 2025-07-02 13:54:47,234 DEBUG can't get fooled again + +If, on the other hand, you are concerned about `log injection +`_, you can use a +formatter which escapes newlines, as per the following example: + +.. code-block:: python + + import logging + + logger = logging.getLogger(__name__) + + class EscapingFormatter(logging.Formatter): + def format(self, record): + s = super().format(record) + return s.replace('\n', r'\n') + + if __name__ == '__main__': + h = logging.StreamHandler() + h.setFormatter(EscapingFormatter('%(asctime)s %(levelname)-9s %(message)s')) + logging.basicConfig(level=logging.DEBUG, handlers = [h]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +You can, of course, use whatever escaping scheme makes the most sense for you. +The script, when run, should produce output like this: + +.. code-block:: text + + 2025-07-09 06:47:33,783 DEBUG Single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me once ... + 2025-07-09 06:47:33,783 DEBUG Another single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me ...\ncan't get fooled again + +Escaping behaviour can't be the stdlib default , as it would break backwards +compatibility. .. patterns-to-avoid: diff --git a/Doc/howto/logging.rst b/Doc/howto/logging.rst index 2982cf88bf97b4..b7225ff1c2cbfc 100644 --- a/Doc/howto/logging.rst +++ b/Doc/howto/logging.rst @@ -302,10 +302,10 @@ reading the following sections. If you're ready for that, grab some of your favourite beverage and carry on. If your logging needs are simple, then use the above examples to incorporate -logging into your own scripts, and if you run into problems or don't -understand something, please post a question on the comp.lang.python Usenet -group (available at https://groups.google.com/g/comp.lang.python) and you -should receive help before too long. +logging into your own scripts, and if you run into problems or don't understand +something, please post a question in the Help category of the `Python +discussion forum `_ and you should receive +help before too long. Still here? You can carry on reading the next few sections, which provide a slightly more advanced/in-depth tutorial than the basic one above. After that, diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index e543f6d5657d79..7486a378dbb06f 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -1016,7 +1016,9 @@ extension. This regular expression matches ``foo.bar`` and Now, consider complicating the problem a bit; what if you want to match filenames where the extension is not ``bat``? Some incorrect attempts: -``.*[.][^b].*$`` The first attempt above tries to exclude ``bat`` by requiring +``.*[.][^b].*$`` + +The first attempt above tries to exclude ``bat`` by requiring that the first character of the extension is not a ``b``. This is wrong, because the pattern also doesn't match ``foo.bar``. @@ -1043,7 +1045,9 @@ confusing. A negative lookahead cuts through all this confusion: -``.*[.](?!bat$)[^.]*$`` The negative lookahead means: if the expression ``bat`` +``.*[.](?!bat$)[^.]*$`` + +The negative lookahead means: if the expression ``bat`` doesn't match at this point, try the rest of the pattern; if ``bat$`` does match, the whole pattern will fail. The trailing ``$`` is required to ensure that something like ``sample.batch``, where the extension only starts with diff --git a/Doc/howto/remote_debugging.rst b/Doc/howto/remote_debugging.rst index 3adb6ad03e5445..78b40bcdf7127b 100644 --- a/Doc/howto/remote_debugging.rst +++ b/Doc/howto/remote_debugging.rst @@ -3,6 +3,78 @@ Remote debugging attachment protocol ==================================== +This protocol enables external tools to attach to a running CPython process and +execute Python code remotely. + +Most platforms require elevated privileges to attach to another Python process. + +.. _permission-requirements: + +Permission requirements +======================= + +Attaching to a running Python process for remote debugging requires elevated +privileges on most platforms. The specific requirements and troubleshooting +steps depend on your operating system: + +.. rubric:: Linux + +The tracer process must have the ``CAP_SYS_PTRACE`` capability or equivalent +privileges. You can only trace processes you own and can signal. Tracing may +fail if the process is already being traced, or if it is running with +set-user-ID or set-group-ID. Security modules like Yama may further restrict +tracing. + +To temporarily relax ptrace restrictions (until reboot), run: + + ``echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope`` + +.. note:: + + Disabling ``ptrace_scope`` reduces system hardening and should only be done + in trusted environments. + +If running inside a container, use ``--cap-add=SYS_PTRACE`` or +``--privileged``, and run as root if needed. + +Try re-running the command with elevated privileges: + + ``sudo -E !!`` + + +.. rubric:: macOS + +To attach to another process, you typically need to run your debugging tool +with elevated privileges. This can be done by using ``sudo`` or running as +root. + +Even when attaching to processes you own, macOS may block debugging unless +the debugger is run with root privileges due to system security restrictions. + + +.. rubric:: Windows + +To attach to another process, you usually need to run your debugging tool +with administrative privileges. Start the command prompt or terminal as +Administrator. + +Some processes may still be inaccessible even with Administrator rights, +unless you have the ``SeDebugPrivilege`` privilege enabled. + +To resolve file or folder access issues, adjust the security permissions: + + 1. Right-click the file or folder and select **Properties**. + 2. Go to the **Security** tab to view users and groups with access. + 3. Click **Edit** to modify permissions. + 4. Select your user account. + 5. In **Permissions**, check **Read** or **Full control** as needed. + 6. Click **Apply**, then **OK** to confirm. + + +.. note:: + + Ensure you've satisfied all :ref:`permission-requirements` before proceeding. + This section describes the low-level protocol that enables external tools to inject and execute a Python script within a running CPython process. @@ -374,13 +446,13 @@ To locate a thread: reliable thread to target. 3. Optionally, use the offset ``interpreter_state.threads_head`` to iterate -through the linked list of all thread states. Each ``PyThreadState`` structure -contains a ``native_thread_id`` field, which may be compared to a target thread -ID to find a specific thread. + through the linked list of all thread states. Each ``PyThreadState`` + structure contains a ``native_thread_id`` field, which may be compared to + a target thread ID to find a specific thread. -1. Once a valid ``PyThreadState`` has been found, its address can be used in -later steps of the protocol, such as writing debugger control fields and -scheduling execution. +4. Once a valid ``PyThreadState`` has been found, its address can be used in + later steps of the protocol, such as writing debugger control fields and + scheduling execution. The following is an example implementation that locates the main thread state:: @@ -454,15 +526,15 @@ its fields are defined by the ``_Py_DebugOffsets`` structure and include the following: - ``debugger_script_path``: A fixed-size buffer that holds the full path to a - Python source file (``.py``). This file must be accessible and readable by - the target process when execution is triggered. + Python source file (``.py``). This file must be accessible and readable by + the target process when execution is triggered. - ``debugger_pending_call``: An integer flag. Setting this to ``1`` tells the - interpreter that a script is ready to be executed. + interpreter that a script is ready to be executed. - ``eval_breaker``: A field checked by the interpreter during execution. - Setting bit 5 (``_PY_EVAL_PLEASE_STOP_BIT``, value ``1U << 5``) in this - field causes the interpreter to pause and check for debugger activity. + Setting bit 5 (``_PY_EVAL_PLEASE_STOP_BIT``, value ``1U << 5``) in this + field causes the interpreter to pause and check for debugger activity. To complete the injection, the debugger must perform the following steps: diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index 254fe729355353..243cc27bac7025 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -352,6 +352,8 @@ If you don't include such a comment, the default encoding used will be UTF-8 as already mentioned. See also :pep:`263` for more information. +.. _unicode-properties: + Unicode Properties ------------------ diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 33a2a7ea89ea07..4e77d2cb407f72 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -15,7 +15,7 @@ Introduction You may also find useful the following article on fetching web resources with Python: - * `Basic Authentication `_ + * `Basic Authentication `__ A tutorial on *Basic Authentication*, with examples in Python. @@ -245,75 +245,27 @@ codes in the 100--299 range indicate success, you will usually only see error codes in the 400--599 range. :attr:`http.server.BaseHTTPRequestHandler.responses` is a useful dictionary of -response codes in that shows all the response codes used by :rfc:`2616`. The -dictionary is reproduced here for convenience :: +response codes that shows all the response codes used by :rfc:`2616`. +An excerpt from the dictionary is shown below :: - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this server.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + ... + : ('OK', 'Request fulfilled, document follows'), + ... + : ('Forbidden', + 'Request forbidden -- authorization will ' + 'not help'), + : ('Not Found', + 'Nothing matches the given URI'), + ... + : ("I'm a Teapot", + 'Server refuses to brew coffee because ' + 'it is a teapot'), + ... + : ('Service Unavailable', + 'The server cannot process the ' + 'request due to a high load'), + ... } When an error is raised the server responds by returning an HTTP error code diff --git a/Doc/includes/newtypes/custom.c b/Doc/includes/newtypes/custom.c index 5253f879360210..039a1a7219349c 100644 --- a/Doc/includes/newtypes/custom.c +++ b/Doc/includes/newtypes/custom.c @@ -16,28 +16,37 @@ static PyTypeObject CustomType = { .tp_new = PyType_GenericNew, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + // Just use this while using static types + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom2.c b/Doc/includes/newtypes/custom2.c index a87917583ca495..1ff8e707d1b0a0 100644 --- a/Doc/includes/newtypes/custom2.c +++ b/Doc/includes/newtypes/custom2.c @@ -106,28 +106,36 @@ static PyTypeObject CustomType = { .tp_methods = Custom_methods, }; -static PyModuleDef custommodule = { - .m_base =PyModuleDef_HEAD_INIT, +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom2", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom2(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom3.c b/Doc/includes/newtypes/custom3.c index 854034d4066d20..22f50eb0e1de89 100644 --- a/Doc/includes/newtypes/custom3.c +++ b/Doc/includes/newtypes/custom3.c @@ -151,28 +151,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom3", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom3(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom4.c b/Doc/includes/newtypes/custom4.c index a0a1eeb289190b..07585aff5987f3 100644 --- a/Doc/includes/newtypes/custom4.c +++ b/Doc/includes/newtypes/custom4.c @@ -170,28 +170,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom4", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom4(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/sublist.c b/Doc/includes/newtypes/sublist.c index 00664f3454156f..b784456a4ef667 100644 --- a/Doc/includes/newtypes/sublist.c +++ b/Doc/includes/newtypes/sublist.c @@ -31,7 +31,7 @@ SubList_init(PyObject *op, PyObject *args, PyObject *kwds) } static PyTypeObject SubListType = { - PyVarObject_HEAD_INIT(NULL, 0) + .ob_base = PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "sublist.SubList", .tp_doc = PyDoc_STR("SubList objects"), .tp_basicsize = sizeof(SubListObject), @@ -41,29 +41,37 @@ static PyTypeObject SubListType = { .tp_methods = SubList_methods, }; -static PyModuleDef sublistmodule = { - PyModuleDef_HEAD_INIT, +static int +sublist_module_exec(PyObject *m) +{ + SubListType.tp_base = &PyList_Type; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot sublist_module_slots[] = { + {Py_mod_exec, sublist_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef sublist_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "sublist", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = sublist_module_slots, }; PyMODINIT_FUNC PyInit_sublist(void) { - PyObject *m; - SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&sublist_module); } diff --git a/Doc/includes/optional-module.rst b/Doc/includes/optional-module.rst new file mode 100644 index 00000000000000..262e73f2eaa09f --- /dev/null +++ b/Doc/includes/optional-module.rst @@ -0,0 +1,9 @@ +This is an :term:`optional module`. +If it is missing from your copy of CPython, +look for documentation from your distributor (that is, +whoever provided Python to you). +If you are the distributor, see :ref:`optional-module-requirements`. + +.. Similar notes appear in the docs of the modules: + - zipfile + - tarfile diff --git a/Doc/installing/index.rst b/Doc/installing/index.rst index a46c1caefe4d8a..3a485a43a5a751 100644 --- a/Doc/installing/index.rst +++ b/Doc/installing/index.rst @@ -188,7 +188,7 @@ switch:: Once the Development & Deployment part of PPUG is fleshed out, some of those sections should be linked from new questions here (most notably, we should have a question about avoiding depending on PyPI that links to - https://packaging.python.org/en/latest/mirrors/) + https://packaging.python.org/en/latest/guides/index-mirrors-and-caches/) Common installation issues diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index 4f3b663006fb28..5d916b30112d3c 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -37,38 +37,52 @@ No feature description will ever be deleted from :mod:`__future__`. Since its introduction in Python 2.1 the following features have found their way into the language using this mechanism: -+------------------+-------------+--------------+---------------------------------------------+ -| feature | optional in | mandatory in | effect | -+==================+=============+==============+=============================================+ -| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | -| | | | *Statically Nested Scopes* | -+------------------+-------------+--------------+---------------------------------------------+ -| generators | 2.2.0a1 | 2.3 | :pep:`255`: | -| | | | *Simple Generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| division | 2.2.0a2 | 3.0 | :pep:`238`: | -| | | | *Changing the Division Operator* | -+------------------+-------------+--------------+---------------------------------------------+ -| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | -| | | | *Imports: Multi-Line and Absolute/Relative* | -+------------------+-------------+--------------+---------------------------------------------+ -| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | -| | | | *The "with" Statement* | -+------------------+-------------+--------------+---------------------------------------------+ -| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | -| | | | *Make print a function* | -+------------------+-------------+--------------+---------------------------------------------+ -| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | -| | | | *Bytes literals in Python 3000* | -+------------------+-------------+--------------+---------------------------------------------+ -| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | -| | | | *StopIteration handling inside generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| annotations | 3.7.0b1 | Never [1]_ | :pep:`563`: | -| | | | *Postponed evaluation of annotations*, | -| | | | :pep:`649`: *Deferred evaluation of | -| | | | annotations using descriptors* | -+------------------+-------------+--------------+---------------------------------------------+ + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * feature + * optional in + * mandatory in + * effect + * * .. data:: nested_scopes + * 2.1.0b1 + * 2.2 + * :pep:`227`: *Statically Nested Scopes* + * * .. data:: generators + * 2.2.0a1 + * 2.3 + * :pep:`255`: *Simple Generators* + * * .. data:: division + * 2.2.0a2 + * 3.0 + * :pep:`238`: *Changing the Division Operator* + * * .. data:: absolute_import + * 2.5.0a1 + * 3.0 + * :pep:`328`: *Imports: Multi-Line and Absolute/Relative* + * * .. data:: with_statement + * 2.5.0a1 + * 2.6 + * :pep:`343`: *The “with” Statement* + * * .. data:: print_function + * 2.6.0a2 + * 3.0 + * :pep:`3105`: *Make print a function* + * * .. data:: unicode_literals + * 2.6.0a2 + * 3.0 + * :pep:`3112`: *Bytes literals in Python 3000* + * * .. data:: generator_stop + * 3.5.0b1 + * 3.7 + * :pep:`479`: *StopIteration handling inside generators* + * * .. data:: annotations + * 3.7.0b1 + * Never [1]_ + * :pep:`563`: *Postponed evaluation of annotations*, + :pep:`649`: *Deferred evaluation of annotations using descriptors* .. XXX Adding a new entry? Remember to update simple_stmts.rst, too. diff --git a/Doc/library/annotationlib.rst b/Doc/library/annotationlib.rst index ff9578b6088f28..40f2a6dc30460b 100644 --- a/Doc/library/annotationlib.rst +++ b/Doc/library/annotationlib.rst @@ -4,6 +4,7 @@ .. module:: annotationlib :synopsis: Functionality for introspecting annotations +.. versionadded:: 3.14 **Source code:** :source:`Lib/annotationlib.py` @@ -45,6 +46,10 @@ and :func:`call_annotate_function`, as well as the :func:`call_evaluate_function` function for working with :term:`evaluate functions `. +.. caution:: + + Most functionality in this module can execute arbitrary code; see + :ref:`the security section ` for more information. .. seealso:: @@ -127,16 +132,27 @@ Classes Values are the result of evaluating the annotation expressions. - .. attribute:: FORWARDREF + .. attribute:: VALUE_WITH_FAKE_GLOBALS :value: 2 + Special value used to signal that an annotate function is being + evaluated in a special environment with fake globals. When passed this + value, annotate functions should either return the same value as for + the :attr:`Format.VALUE` format, or raise :exc:`NotImplementedError` + to signal that they do not support execution in this environment. + This format is only used internally and should not be passed to + the functions in this module. + + .. attribute:: FORWARDREF + :value: 3 + Values are real annotation values (as per :attr:`Format.VALUE` format) for defined values, and :class:`ForwardRef` proxies for undefined values. Real objects may contain references to :class:`ForwardRef` proxy objects. .. attribute:: STRING - :value: 3 + :value: 4 Values are the text string of the annotation as it appears in the source code, up to modifications including, but not restricted to, @@ -144,17 +160,6 @@ Classes The exact values of these strings may change in future versions of Python. - .. attribute:: VALUE_WITH_FAKE_GLOBALS - :value: 4 - - Special value used to signal that an annotate function is being - evaluated in a special environment with fake globals. When passed this - value, annotate functions should either return the same value as for - the :attr:`Format.VALUE` format, or raise :exc:`NotImplementedError` - to signal that they do not support execution in this environment. - This format is only used internally and should not be passed to - the functions in this module. - .. versionadded:: 3.14 .. class:: ForwardRef @@ -211,6 +216,10 @@ Classes means may not have any information about their scope, so passing arguments to this method may be necessary to evaluate them successfully. + If no *owner*, *globals*, *locals*, or *type_params* are provided and the + :class:`~ForwardRef` does not contain information about its origin, + empty globals and locals dictionaries are used. + .. versionadded:: 3.14 @@ -331,14 +340,29 @@ Functions * VALUE: :attr:`!object.__annotations__` is tried first; if that does not exist, the :attr:`!object.__annotate__` function is called if it exists. + * FORWARDREF: If :attr:`!object.__annotations__` exists and can be evaluated successfully, it is used; otherwise, the :attr:`!object.__annotate__` function is called. If it does not exist either, :attr:`!object.__annotations__` is tried again and any error from accessing it is re-raised. + + * When calling :attr:`!object.__annotate__` it is first called with :attr:`~Format.FORWARDREF`. + If this is not implemented, it will then check if :attr:`~Format.VALUE_WITH_FAKE_GLOBALS` + is supported and use that in the fake globals environment. + If neither of these formats are supported, it will fall back to using :attr:`~Format.VALUE`. + If :attr:`~Format.VALUE` fails, the error from this call will be raised. + * STRING: If :attr:`!object.__annotate__` exists, it is called first; otherwise, :attr:`!object.__annotations__` is used and stringified using :func:`annotations_to_string`. + * When calling :attr:`!object.__annotate__` it is first called with :attr:`~Format.STRING`. + If this is not implemented, it will then check if :attr:`~Format.VALUE_WITH_FAKE_GLOBALS` + is supported and use that in the fake globals environment. + If neither of these formats are supported, it will fall back to using :attr:`~Format.VALUE` + with the result converted using :func:`annotations_to_string`. + If :attr:`~Format.VALUE` fails, the error from this call will be raised. + Returns a dict. :func:`!get_annotations` returns a new dict every time it's called; calling it twice on the same object will return two different but equivalent dicts. @@ -485,3 +509,137 @@ annotations from the class and puts them in a separate attribute: typ.classvars = classvars # Store the ClassVars in a separate attribute return typ + +Limitations of the ``STRING`` format +------------------------------------ + +The :attr:`~Format.STRING` format is meant to approximate the source code +of the annotation, but the implementation strategy used means that it is not +always possible to recover the exact source code. + +First, the stringifier of course cannot recover any information that is not present in +the compiled code, including comments, whitespace, parenthesization, and operations that +get simplified by the compiler. + +Second, the stringifier can intercept almost all operations that involve names looked +up in some scope, but it cannot intercept operations that operate fully on constants. +As a corollary, this also means it is not safe to request the ``STRING`` format on +untrusted code: Python is powerful enough that it is possible to achieve arbitrary +code execution even with no access to any globals or builtins. For example: + +.. code-block:: pycon + + >>> def f(x: (1).__class__.__base__.__subclasses__()[-1].__init__.__builtins__["print"]("Hello world")): pass + ... + >>> annotationlib.get_annotations(f, format=annotationlib.Format.STRING) + Hello world + {'x': 'None'} + +.. note:: + This particular example works as of the time of writing, but it relies on + implementation details and is not guaranteed to work in the future. + +Among the different kinds of expressions that exist in Python, +as represented by the :mod:`ast` module, some expressions are supported, +meaning that the ``STRING`` format can generally recover the original source code; +others are unsupported, meaning that they may result in incorrect output or an error. + +The following are supported (sometimes with caveats): + +* :class:`ast.BinOp` +* :class:`ast.UnaryOp` + + * :class:`ast.Invert` (``~``), :class:`ast.UAdd` (``+``), and :class:`ast.USub` (``-``) are supported + * :class:`ast.Not` (``not``) is not supported + +* :class:`ast.Dict` (except when using ``**`` unpacking) +* :class:`ast.Set` +* :class:`ast.Compare` + + * :class:`ast.Eq` and :class:`ast.NotEq` are supported + * :class:`ast.Lt`, :class:`ast.LtE`, :class:`ast.Gt`, and :class:`ast.GtE` are supported, but the operand may be flipped + * :class:`ast.Is`, :class:`ast.IsNot`, :class:`ast.In`, and :class:`ast.NotIn` are not supported + +* :class:`ast.Call` (except when using ``**`` unpacking) +* :class:`ast.Constant` (though not the exact representation of the constant; for example, escape + sequences in strings are lost; hexadecimal numbers are converted to decimal) +* :class:`ast.Attribute` (assuming the value is not a constant) +* :class:`ast.Subscript` (assuming the value is not a constant) +* :class:`ast.Starred` (``*`` unpacking) +* :class:`ast.Name` +* :class:`ast.List` +* :class:`ast.Tuple` +* :class:`ast.Slice` + +The following are unsupported, but throw an informative error when encountered by the +stringifier: + +* :class:`ast.FormattedValue` (f-strings; error is not detected if conversion specifiers like ``!r`` + are used) +* :class:`ast.JoinedStr` (f-strings) + +The following are unsupported and result in incorrect output: + +* :class:`ast.BoolOp` (``and`` and ``or``) +* :class:`ast.IfExp` +* :class:`ast.Lambda` +* :class:`ast.ListComp` +* :class:`ast.SetComp` +* :class:`ast.DictComp` +* :class:`ast.GeneratorExp` + +The following are disallowed in annotation scopes and therefore not relevant: + +* :class:`ast.NamedExpr` (``:=``) +* :class:`ast.Await` +* :class:`ast.Yield` +* :class:`ast.YieldFrom` + + +Limitations of the ``FORWARDREF`` format +---------------------------------------- + +The :attr:`~Format.FORWARDREF` format aims to produce real values as much +as possible, with anything that cannot be resolved replaced with +:class:`ForwardRef` objects. It is affected by broadly the same Limitations +as the :attr:`~Format.STRING` format: annotations that perform operations on +literals or that use unsupported expression types may raise exceptions when +evaluated using the :attr:`~Format.FORWARDREF` format. + +Below are a few examples of the behavior with unsupported expressions: + +.. code-block:: pycon + + >>> from annotationlib import get_annotations, Format + >>> def zerodiv(x: 1 / 0): ... + >>> get_annotations(zerodiv, format=Format.STRING) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> get_annotations(zerodiv, format=Format.FORWARDREF) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> def ifexp(x: 1 if y else 0): ... + >>> get_annotations(ifexp, format=Format.STRING) + {'x': '1'} + +.. _annotationlib-security: + +Security implications of introspecting annotations +-------------------------------------------------- + +Much of the functionality in this module involves executing code related to annotations, +which can then do arbitrary things. For example, +:func:`get_annotations` may call an arbitrary :term:`annotate function`, and +:meth:`ForwardRef.evaluate` may call :func:`eval` on an arbitrary string. Code contained +in an annotation might make arbitrary system calls, enter an infinite loop, or perform any +other operation. This is also true for any access of the :attr:`~object.__annotations__` attribute, +and for various functions in the :mod:`typing` module that work with annotations, such as +:func:`typing.get_type_hints`. + +Any security issue arising from this also applies immediately after importing +code that may contain untrusted annotations: importing code can always cause arbitrary operations +to be performed. However, it is unsafe to accept strings or other input from an untrusted source and +pass them to any of the APIs for introspecting annotations, for example by editing an +``__annotations__`` dictionary or directly creating a :class:`ForwardRef` object. diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst index c9284949af4972..da0b3f8c3e7693 100644 --- a/Doc/library/archiving.rst +++ b/Doc/library/archiving.rst @@ -5,13 +5,15 @@ Data Compression and Archiving ****************************** The modules described in this chapter support data compression with the zlib, -gzip, bzip2 and lzma algorithms, and the creation of ZIP- and tar-format +gzip, bzip2, lzma, and zstd algorithms, and the creation of ZIP- and tar-format archives. See also :ref:`archiving-operations` provided by the :mod:`shutil` module. .. toctree:: + compression.rst + compression.zstd.rst zlib.rst gzip.rst bz2.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 29396c7a0366a1..413920ef1e1732 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -74,7 +74,7 @@ ArgumentParser objects prefix_chars='-', fromfile_prefix_chars=None, \ argument_default=None, conflict_handler='error', \ add_help=True, allow_abbrev=True, exit_on_error=True, \ - *, suggest_on_error=False, color=False) + *, suggest_on_error=False, color=True) Create a new :class:`ArgumentParser` object. All parameters should be passed as keyword arguments. Each parameter has its own more detailed description @@ -119,7 +119,7 @@ ArgumentParser objects * suggest_on_error_ - Enables suggestions for mistyped argument choices and subparser names (default: ``False``) - * color_ - Allow color output (default: ``False``) + * color_ - Allow color output (default: ``True``) .. versionchanged:: 3.5 *allow_abbrev* parameter was added. @@ -434,12 +434,18 @@ arguments they contain. For example:: >>> parser.parse_args(['-f', 'foo', '@args.txt']) Namespace(f='bar') -Arguments read from a file must by default be one per line (but see also +Arguments read from a file must be one per line by default (but see also :meth:`~ArgumentParser.convert_arg_line_to_args`) and are treated as if they were in the same place as the original file referencing argument on the command line. So in the example above, the expression ``['-f', 'foo', '@args.txt']`` is considered equivalent to the expression ``['-f', 'foo', '-f', 'bar']``. +.. note:: + + Empty lines are treated as empty strings (``''``), which are allowed as values but + not as arguments. Empty lines that are read as arguments will result in an + "unrecognized arguments" error. + :class:`ArgumentParser` uses :term:`filesystem encoding and error handler` to read the file containing arguments. @@ -620,26 +626,30 @@ keyword argument:: color ^^^^^ -By default, the help message is printed in plain text. If you want to allow -color in help messages, you can enable it by setting ``color`` to ``True``:: +By default, the help message is printed in color using `ANSI escape sequences +`__. +If you want plain text help messages, you can disable this :ref:`in your local +environment `, or in the argument parser itself +by setting ``color`` to ``False``:: >>> parser = argparse.ArgumentParser(description='Process some integers.', - ... color=True) + ... color=False) >>> parser.add_argument('--action', choices=['sum', 'max']) >>> parser.add_argument('integers', metavar='N', type=int, nargs='+', ... help='an integer for the accumulator') >>> parser.parse_args(['--help']) -Even if a CLI author has enabled color, it can be -:ref:`controlled using environment variables `. +Note that when ``color=True``, colored output depends on both environment +variables and terminal capabilities. However, if ``color=False``, colored +output is always disabled, even if environment variables like ``FORCE_COLOR`` +are set. -If you're writing code that needs to be compatible with older Python versions -and want to opportunistically use ``color`` when it's available, you -can set it as an attribute after initializing the parser instead of using the -keyword argument:: +.. note:: - >>> parser = argparse.ArgumentParser(description='Process some integers.') - >>> parser.color = True + Error messages will include color codes when redirecting stderr to a + file. To avoid this, set the |NO_COLOR|_ or :envvar:`PYTHON_COLORS` + environment variable (for example, + ``NO_COLOR=1 python script.py 2> errors.txt``). .. versionadded:: 3.14 @@ -765,9 +775,9 @@ how the command-line arguments should be handled. The supplied actions are: Namespace(foo=42) * ``'store_true'`` and ``'store_false'`` - These are special cases of - ``'store_const'`` used for storing the values ``True`` and ``False`` - respectively. In addition, they create default values of ``False`` and - ``True`` respectively:: + ``'store_const'`` that respectively store the values ``True`` and ``False`` + with default values of ``False`` and + ``True``:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('--foo', action='store_true') @@ -776,19 +786,19 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--foo --bar'.split()) Namespace(foo=True, bar=False, baz=True) -* ``'append'`` - This stores a list, and appends each argument value to the - list. It is useful to allow an option to be specified multiple times. - If the default value is non-empty, the default elements will be present - in the parsed value for the option, with any values from the - command line appended after those default values. Example usage:: +* ``'append'`` - This appends each argument value to a list. + It is useful for allowing an option to be specified multiple times. + If the default value is a non-empty list, the parsed value will start + with the default list's elements and any values from the command line + will be appended after those default values. Example usage:: >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('--foo', action='append') + >>> parser.add_argument('--foo', action='append', default=['0']) >>> parser.parse_args('--foo 1 --foo 2'.split()) - Namespace(foo=['1', '2']) + Namespace(foo=['0', '1', '2']) -* ``'append_const'`` - This stores a list, and appends the value specified by - the const_ keyword argument to the list; note that the const_ keyword +* ``'append_const'`` - This appends the value specified by + the const_ keyword argument to a list; note that the const_ keyword argument defaults to ``None``. The ``'append_const'`` action is typically useful when multiple arguments need to store constants to the same list. For example:: @@ -799,8 +809,8 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--str --int'.split()) Namespace(types=[, ]) -* ``'extend'`` - This stores a list and appends each item from the multi-value - argument list to it. +* ``'extend'`` - This appends each item from a multi-value + argument to a list. The ``'extend'`` action is typically used with the nargs_ keyword argument value ``'+'`` or ``'*'``. Note that when nargs_ is ``None`` (the default) or ``'?'``, each @@ -814,7 +824,7 @@ how the command-line arguments should be handled. The supplied actions are: .. versionadded:: 3.8 -* ``'count'`` - This counts the number of times a keyword argument occurs. For +* ``'count'`` - This counts the number of times an argument occurs. For example, this is useful for increasing verbosity levels:: >>> parser = argparse.ArgumentParser() @@ -839,23 +849,11 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args(['--version']) PROG 2.0 -Only actions that consume command-line arguments (e.g. ``'store'``, -``'append'`` or ``'extend'``) can be used with positional arguments. - -.. class:: BooleanOptionalAction - - You may also specify an arbitrary action by passing an :class:`Action` subclass or - other object that implements the same interface. The :class:`!BooleanOptionalAction` - is available in :mod:`!argparse` and adds support for boolean actions such as - ``--foo`` and ``--no-foo``:: - - >>> import argparse - >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) - >>> parser.parse_args(['--no-foo']) - Namespace(foo=False) - - .. versionadded:: 3.9 +You may also specify an arbitrary action by passing an :class:`Action` subclass +(e.g. :class:`BooleanOptionalAction`) or other object that implements the same +interface. Only actions that consume command-line arguments (e.g. ``'store'``, +``'append'``, ``'extend'``, or custom actions with non-zero ``nargs``) can be used +with positional arguments. The recommended way to create a custom action is to extend :class:`Action`, overriding the :meth:`!__call__` method and optionally the :meth:`!__init__` and @@ -955,7 +953,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: .. index:: single: + (plus); in argparse module -* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a +* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a list. Additionally, an error message will be generated if there wasn't at least one command-line argument present. For example:: @@ -995,8 +993,8 @@ the various :class:`ArgumentParser` actions. The two most common uses of it are (like ``-f`` or ``--foo``) and ``nargs='?'``. This creates an optional argument that can be followed by zero or one command-line arguments. When parsing the command line, if the option string is encountered with no - command-line argument following it, the value of ``const`` will be assumed to - be ``None`` instead. See the nargs_ description for examples. + command-line argument following it, the value from ``const`` will be used. + See the nargs_ description for examples. .. versionchanged:: 3.11 ``const=None`` by default, including when ``action='append_const'`` or @@ -1156,16 +1154,21 @@ if the argument was not one of the acceptable values:: game.py: error: argument move: invalid choice: 'fire' (choose from 'rock', 'paper', 'scissors') -Note that inclusion in the *choices* sequence is checked after any type_ -conversions have been performed, so the type of the objects in the *choices* -sequence should match the type_ specified. - Any sequence can be passed as the *choices* value, so :class:`list` objects, :class:`tuple` objects, and custom sequences are all supported. Use of :class:`enum.Enum` is not recommended because it is difficult to control its appearance in usage, help, and error messages. +Note that *choices* are checked after any type_ +conversions have been performed, so objects in *choices* +should match the type_ specified. This can make *choices* +appear unfamiliar in usage, help, or error messages. + +To keep *choices* user-friendly, consider a custom type wrapper that +converts and formats values, or omit type_ and handle conversion in +your application code. + Formatted choices override the default *metavar* which is normally derived from *dest*. This is usually what you want because the user never sees the *dest* parameter. If this display isn't desirable (perhaps because there are @@ -1429,6 +1432,21 @@ this API may be passed as the ``action`` parameter to and return a string which will be used when printing the usage of the program. If such method is not provided, a sensible default will be used. +.. class:: BooleanOptionalAction + + A subclass of :class:`Action` for handling boolean flags with positive + and negative options. Adding a single argument such as ``--foo`` automatically + creates both ``--foo`` and ``--no-foo`` options, storing ``True`` and ``False`` + respectively:: + + >>> import argparse + >>> parser = argparse.ArgumentParser() + >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) + >>> parser.parse_args(['--no-foo']) + Namespace(foo=False) + + .. versionadded:: 3.9 + The parse_args() method ----------------------- @@ -1651,7 +1669,7 @@ The Namespace object Other utilities --------------- -Sub-commands +Subcommands ^^^^^^^^^^^^ .. method:: ArgumentParser.add_subparsers(*, [title], [description], [prog], \ @@ -1680,7 +1698,7 @@ Sub-commands * *description* - description for the sub-parser group in help output, by default ``None`` - * *prog* - usage information that will be displayed with sub-command help, + * *prog* - usage information that will be displayed with subcommand help, by default the name of the program and any positional arguments before the subparser argument @@ -1690,7 +1708,7 @@ Sub-commands * action_ - the basic type of action to be taken when this argument is encountered at the command line - * dest_ - name of the attribute under which sub-command name will be + * dest_ - name of the attribute under which subcommand name will be stored; by default ``None`` and no value is stored * required_ - Whether or not a subcommand must be provided, by default @@ -2060,7 +2078,9 @@ Parser defaults >>> parser.parse_args(['736']) Namespace(bar=42, baz='badger', foo=736) - Note that parser-level defaults always override argument-level defaults:: + Note that defaults can be set at both the parser level using :meth:`set_defaults` + and at the argument level using :meth:`add_argument`. If both are called for the + same argument, the last default set for an argument is used:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('--foo', default='bar') @@ -2122,12 +2142,15 @@ Partial parsing .. method:: ArgumentParser.parse_known_args(args=None, namespace=None) - Sometimes a script may only parse a few of the command-line arguments, passing - the remaining arguments on to another script or program. In these cases, the - :meth:`~ArgumentParser.parse_known_args` method can be useful. It works much like - :meth:`~ArgumentParser.parse_args` except that it does not produce an error when - extra arguments are present. Instead, it returns a two item tuple containing - the populated namespace and the list of remaining argument strings. + Sometimes a script only needs to handle a specific set of command-line + arguments, leaving any unrecognized arguments for another script or program. + In these cases, the :meth:`~ArgumentParser.parse_known_args` method can be + useful. + + This method works similarly to :meth:`~ArgumentParser.parse_args`, but it does + not raise an error for extra, unrecognized arguments. Instead, it parses the + known arguments and returns a two item tuple that contains the populated + namespace and the list of any unrecognized arguments. :: diff --git a/Doc/library/array.rst b/Doc/library/array.rst index e0b1eb89cf6c05..1f04f697c7507f 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -24,7 +24,7 @@ defined: +-----------+--------------------+-------------------+-----------------------+-------+ | ``'u'`` | wchar_t | Unicode character | 2 | \(1) | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'w'`` | Py_UCS4 | Unicode character | 4 | | +| ``'w'`` | Py_UCS4 | Unicode character | 4 | \(2) | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'h'`` | signed short | int | 2 | | +-----------+--------------------+-------------------+-----------------------+-------+ @@ -60,6 +60,9 @@ Notes: .. deprecated-removed:: 3.3 3.16 Please migrate to ``'w'`` typecode. +(2) + .. versionadded:: 3.13 + The actual representation of values is determined by the machine architecture (strictly speaking, by the C implementation). The actual size can be accessed diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ca9a6b0712c9a2..83cb6e9f215869 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -268,9 +268,9 @@ Literals .. class:: Constant(value) A constant value. The ``value`` attribute of the ``Constant`` literal contains the - Python object it represents. The values represented can be simple types - such as a number, string or ``None``, but also immutable container types - (tuples and frozensets) if all of their elements are constant. + Python object it represents. The values represented can be instances of :class:`str`, + :class:`bytes`, :class:`int`, :class:`float`, :class:`complex`, and :class:`bool`, + and the constants :data:`None` and :data:`Ellipsis`. .. doctest:: @@ -290,9 +290,9 @@ Literals * ``conversion`` is an integer: * -1: no formatting - * 115: ``!s`` string formatting - * 114: ``!r`` repr formatting - * 97: ``!a`` ascii formatting + * 97 (``ord('a')``): ``!a`` :func:`ASCII ` formatting + * 114 (``ord('r')``): ``!r`` :func:`repr` formatting + * 115 (``ord('s')``): ``!s`` :func:`string ` formatting * ``format_spec`` is a :class:`JoinedStr` node representing the formatting of the value, or ``None`` if no format was specified. Both @@ -326,6 +326,63 @@ Literals Constant(value='.3')]))])) +.. class:: TemplateStr(values, /) + + .. versionadded:: 3.14 + + Node representing a template string literal, comprising a series of + :class:`Interpolation` and :class:`Constant` nodes. + These nodes may be any order, and do not need to be interleaved. + + .. doctest:: + + >>> expr = ast.parse('t"{name} finished {place:ordinal}"', mode='eval') + >>> print(ast.dump(expr, indent=4)) + Expression( + body=TemplateStr( + values=[ + Interpolation( + value=Name(id='name', ctx=Load()), + str='name', + conversion=-1), + Constant(value=' finished '), + Interpolation( + value=Name(id='place', ctx=Load()), + str='place', + conversion=-1, + format_spec=JoinedStr( + values=[ + Constant(value='ordinal')]))])) + +.. class:: Interpolation(value, str, conversion, format_spec=None) + + .. versionadded:: 3.14 + + Node representing a single interpolation field in a template string literal. + + * ``value`` is any expression node (such as a literal, a variable, or a + function call). + This has the same meaning as ``FormattedValue.value``. + * ``str`` is a constant containing the text of the interpolation expression. + + If ``str`` is set to ``None``, then ``value`` is used to generate code + when calling :func:`ast.unparse`. This no longer guarantees that the + generated code is identical to the original and is intended for code + generation. + * ``conversion`` is an integer: + + * -1: no conversion + * 97 (``ord('a')``): ``!a`` :func:`ASCII ` conversion + * 114 (``ord('r')``): ``!r`` :func:`repr` conversion + * 115 (``ord('s')``): ``!s`` :func:`string ` conversion + + This has the same meaning as ``FormattedValue.conversion``. + * ``format_spec`` is a :class:`JoinedStr` node representing the formatting + of the value, or ``None`` if no format was specified. Both + ``conversion`` and ``format_spec`` can be set at the same time. + This has the same meaning as ``FormattedValue.format_spec``. + + .. class:: List(elts, ctx) Tuple(elts, ctx) @@ -2445,8 +2502,9 @@ and classes for traversing abstract syntax trees: indents that many spaces per level. If *indent* is a string (such as ``"\t"``), that string is used to indent each level. - If *show_empty* is ``False`` (the default), empty lists and fields that are ``None`` - will be omitted from the output. + If *show_empty* is false (the default), optional empty lists will be + omitted from the output. + Optional ``None`` values are always omitted. .. versionchanged:: 3.9 Added the *indent* option. diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index 44b507a9811116..7831b613bd4a60 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -46,10 +46,6 @@ In addition to enabling the debug mode, consider also: When the debug mode is enabled: -* asyncio checks for :ref:`coroutines that were not awaited - ` and logs them; this mitigates - the "forgotten await" pitfall. - * Many non-threadsafe asyncio APIs (such as :meth:`loop.call_soon` and :meth:`loop.call_at` methods) raise an exception if they are called from a wrong thread. diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 21f7d0547af1dd..72f484fd1cbe77 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -304,6 +304,12 @@ clocks to track time. custom :class:`contextvars.Context` for the *callback* to run in. The current context is used when no *context* is provided. + .. note:: + + For performance, callbacks scheduled with :meth:`loop.call_later` + may run up to one clock-resolution early (see + ``time.get_clock_info('monotonic').resolution``). + .. versionchanged:: 3.7 The *context* keyword-only parameter was added. See :pep:`567` for more details. @@ -324,6 +330,12 @@ clocks to track time. An instance of :class:`asyncio.TimerHandle` is returned which can be used to cancel the callback. + .. note:: + + For performance, callbacks scheduled with :meth:`loop.call_at` + may run up to one clock-resolution early (see + ``time.get_clock_info('monotonic').resolution``). + .. versionchanged:: 3.7 The *context* keyword-only parameter was added. See :pep:`567` for more details. @@ -361,7 +373,7 @@ Creating Futures and Tasks .. versionadded:: 3.5.2 -.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None) +.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Schedule the execution of :ref:`coroutine ` *coro*. Return a :class:`Task` object. @@ -370,6 +382,10 @@ Creating Futures and Tasks for interoperability. In this case, the result type is a subclass of :class:`Task`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. + If the *name* argument is provided and not ``None``, it is set as the name of the task using :meth:`Task.set_name`. @@ -388,8 +404,15 @@ Creating Futures and Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.13.3 + Added ``kwargs`` which passes on arbitrary extra parameters, including ``name`` and ``context``. + + .. versionchanged:: 3.13.4 + Rolled back the change that passes on *name* and *context* (if it is None), + while still passing on other arbitrary keyword arguments (to avoid breaking backwards compatibility with 3.13.3). + .. versionchanged:: 3.14 - Added the *eager_start* parameter. + All *kwargs* are now passed on. The *eager_start* parameter works with eager task factories. .. method:: loop.set_task_factory(factory) @@ -402,6 +425,16 @@ Creating Futures and Tasks event loop, and *coro* is a coroutine object. The callable must pass on all *kwargs*, and return a :class:`asyncio.Task`-compatible object. + .. versionchanged:: 3.13.3 + Required that all *kwargs* are passed on to :class:`asyncio.Task`. + + .. versionchanged:: 3.13.4 + *name* is no longer passed to task factories. *context* is no longer passed + to task factories if it is ``None``. + + .. versionchanged:: 3.14 + *name* and *context* are now unconditionally passed on to task factories again. + .. method:: loop.get_task_factory() Return a task factory or ``None`` if the default one is in use. @@ -590,6 +623,12 @@ Opening network connections to bind the socket locally. The *local_host* and *local_port* are looked up using :meth:`getaddrinfo`. + .. note:: + + On Windows, when using the proactor event loop with ``local_addr=None``, + an :exc:`OSError` with :attr:`!errno.WSAEINVAL` will be raised + when running it. + * *remote_addr*, if given, is a ``(remote_host, remote_port)`` tuple used to connect the socket to a remote address. The *remote_host* and *remote_port* are looked up using :meth:`getaddrinfo`. @@ -1592,6 +1631,9 @@ async/await code consider using the high-level conforms to the :class:`asyncio.SubprocessTransport` base class and *protocol* is an object instantiated by the *protocol_factory*. + If the transport is closed or is garbage collected, the child process + is killed if it is still running. + .. method:: loop.subprocess_shell(protocol_factory, cmd, *, \ stdin=subprocess.PIPE, stdout=subprocess.PIPE, \ stderr=subprocess.PIPE, **kwargs) @@ -1615,6 +1657,9 @@ async/await code consider using the high-level conforms to the :class:`SubprocessTransport` base class and *protocol* is an object instantiated by the *protocol_factory*. + If the transport is closed or is garbage collected, the child process + is killed if it is still running. + .. note:: It is the application's responsibility to ensure that all whitespace and special characters are quoted appropriately to avoid `shell injection diff --git a/Doc/library/asyncio-future.rst b/Doc/library/asyncio-future.rst index 32771ba72e0002..4b69e569523c58 100644 --- a/Doc/library/asyncio-future.rst +++ b/Doc/library/asyncio-future.rst @@ -75,6 +75,7 @@ Future Functions Deprecation warning is emitted if *future* is not a Future-like object and *loop* is not specified and there is no running event loop. +.. _asyncio-future-obj: Future Object ============= diff --git a/Doc/library/asyncio-protocol.rst b/Doc/library/asyncio-protocol.rst index 7c08d65f26bc27..5208f14c94a50f 100644 --- a/Doc/library/asyncio-protocol.rst +++ b/Doc/library/asyncio-protocol.rst @@ -390,11 +390,11 @@ Subprocess Transports Return the transport for the communication pipe corresponding to the integer file descriptor *fd*: - * ``0``: readable streaming transport of the standard input (*stdin*), + * ``0``: writable streaming transport of the standard input (*stdin*), or :const:`None` if the subprocess was not created with ``stdin=PIPE`` - * ``1``: writable streaming transport of the standard output (*stdout*), + * ``1``: readable streaming transport of the standard output (*stdout*), or :const:`None` if the subprocess was not created with ``stdout=PIPE`` - * ``2``: writable streaming transport of the standard error (*stderr*), + * ``2``: readable streaming transport of the standard error (*stderr*), or :const:`None` if the subprocess was not created with ``stderr=PIPE`` * other *fd*: :const:`None` diff --git a/Doc/library/asyncio-queue.rst b/Doc/library/asyncio-queue.rst index d99213aa81d53e..d481a1921d532b 100644 --- a/Doc/library/asyncio-queue.rst +++ b/Doc/library/asyncio-queue.rst @@ -102,17 +102,34 @@ Queue .. method:: shutdown(immediate=False) - Shut down the queue, making :meth:`~Queue.get` and :meth:`~Queue.put` + Put a :class:`Queue` instance into a shutdown mode. + + The queue can no longer grow. + Future calls to :meth:`~Queue.put` raise :exc:`QueueShutDown`. + Currently blocked callers of :meth:`~Queue.put` will be unblocked + and will raise :exc:`QueueShutDown` in the formerly blocked thread. + + If *immediate* is false (the default), the queue can be wound + down normally with :meth:`~Queue.get` calls to extract tasks + that have already been loaded. + + And if :meth:`~Queue.task_done` is called for each remaining task, a + pending :meth:`~Queue.join` will be unblocked normally. + + Once the queue is empty, future calls to :meth:`~Queue.get` will raise :exc:`QueueShutDown`. - By default, :meth:`~Queue.get` on a shut down queue will only - raise once the queue is empty. Set *immediate* to true to make - :meth:`~Queue.get` raise immediately instead. + If *immediate* is true, the queue is terminated immediately. + The queue is drained to be completely empty and the count + of unfinished tasks is reduced by the number of tasks drained. + If unfinished tasks is zero, callers of :meth:`~Queue.join` + are unblocked. Also, blocked callers of :meth:`~Queue.get` + are unblocked and will raise :exc:`QueueShutDown` because the + queue is empty. - All blocked callers of :meth:`~Queue.put` and :meth:`~Queue.get` - will be unblocked. If *immediate* is true, a task will be marked - as done for each remaining item in the queue, which may unblock - callers of :meth:`~Queue.join`. + Use caution when using :meth:`~Queue.join` with *immediate* set + to true. This unblocks the join even when no work has been done + on the tasks, violating the usual invariant for joining a queue. .. versionadded:: 3.13 @@ -129,9 +146,6 @@ Queue call was received for every item that had been :meth:`~Queue.put` into the queue). - ``shutdown(immediate=True)`` calls :meth:`task_done` for each - remaining item in the queue. - Raises :exc:`ValueError` if called more times than there were items placed in the queue. diff --git a/Doc/library/asyncio-stream.rst b/Doc/library/asyncio-stream.rst index c56166cabb9093..05445219510ca5 100644 --- a/Doc/library/asyncio-stream.rst +++ b/Doc/library/asyncio-stream.rst @@ -171,13 +171,17 @@ and work with streams: .. function:: start_unix_server(client_connected_cb, path=None, \ *, limit=None, sock=None, backlog=100, ssl=None, \ ssl_handshake_timeout=None, \ - ssl_shutdown_timeout=None, start_serving=True) + ssl_shutdown_timeout=None, start_serving=True, cleanup_socket=True) :async: Start a Unix socket server. Similar to :func:`start_server` but works with Unix sockets. + If *cleanup_socket* is true then the Unix socket will automatically + be removed from the filesystem when the server is closed, unless the + socket has been replaced after the server has been created. + See also the documentation of :meth:`loop.create_unix_server`. .. note:: @@ -198,6 +202,9 @@ and work with streams: .. versionchanged:: 3.11 Added the *ssl_shutdown_timeout* parameter. + .. versionchanged:: 3.13 + Added the *cleanup_socket* parameter. + StreamReader ============ @@ -309,11 +316,15 @@ StreamWriter If that fails, the data is queued in an internal write buffer until it can be sent. + The *data* buffer should be a bytes, bytearray, or C-contiguous one-dimensional + memoryview object. + The method should be used along with the ``drain()`` method:: stream.write(data) await stream.drain() + .. method:: writelines(data) The method writes a list (or any iterable) of bytes to the underlying socket diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst index 03e76bc868905e..9416c758e51d95 100644 --- a/Doc/library/asyncio-subprocess.rst +++ b/Doc/library/asyncio-subprocess.rst @@ -76,6 +76,9 @@ Creating Subprocesses See the documentation of :meth:`loop.subprocess_exec` for other parameters. + If the process object is garbage collected while the process is still + running, the child process will be killed. + .. versionchanged:: 3.10 Removed the *loop* parameter. @@ -95,6 +98,9 @@ Creating Subprocesses See the documentation of :meth:`loop.subprocess_shell` for other parameters. + If the process object is garbage collected while the process is still + running, the child process will be killed. + .. important:: It is the application's responsibility to ensure that all whitespace and diff --git a/Doc/library/asyncio-sync.rst b/Doc/library/asyncio-sync.rst index 968c812ee3c8e6..f9e98e05cab7ac 100644 --- a/Doc/library/asyncio-sync.rst +++ b/Doc/library/asyncio-sync.rst @@ -157,7 +157,7 @@ Event Clear (unset) the event. - Tasks awaiting on :meth:`~Event.wait` will now block until the + Subsequent tasks awaiting on :meth:`~Event.wait` will now block until the :meth:`~Event.set` method is called again. .. method:: is_set() diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 59acce1990ae04..f825ae92ec7471 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -238,18 +238,24 @@ Creating Tasks ----------------------------------------------- -.. function:: create_task(coro, *, name=None, context=None) +.. function:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Wrap the *coro* :ref:`coroutine ` into a :class:`Task` and schedule its execution. Return the Task object. - If *name* is not ``None``, it is set as the name of the task using - :meth:`Task.set_name`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. An optional keyword-only *context* argument allows specifying a custom :class:`contextvars.Context` for the *coro* to run in. The current context copy is created when no *context* is provided. + An optional keyword-only *eager_start* argument allows specifying + if the task should execute eagerly during the call to create_task, + or be scheduled later. If *eager_start* is not passed the mode set + by :meth:`loop.set_task_factory` will be used. + The task is executed in the loop returned by :func:`get_running_loop`, :exc:`RuntimeError` is raised if there is no running loop in current thread. @@ -290,6 +296,9 @@ Creating Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.14 + Added the *eager_start* parameter by passing on all *kwargs*. + Task Cancellation ================= @@ -330,7 +339,7 @@ and reliable way to wait for all tasks in the group to finish. .. versionadded:: 3.11 - .. method:: create_task(coro, *, name=None, context=None) + .. method:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Create a task in this task group. The signature matches that of :func:`asyncio.create_task`. @@ -342,6 +351,10 @@ and reliable way to wait for all tasks in the group to finish. Close the given coroutine if the task group is not active. + .. versionchanged:: 3.14 + + Passes on all *kwargs* to :meth:`loop.create_task` + Example:: async def main(): @@ -1180,6 +1193,7 @@ Introspection .. versionadded:: 3.4 +.. _asyncio-task-obj: Task Object =========== diff --git a/Doc/library/asyncio.rst b/Doc/library/asyncio.rst index 7d368dae49dc1d..0f72e31dee5f1d 100644 --- a/Doc/library/asyncio.rst +++ b/Doc/library/asyncio.rst @@ -29,6 +29,11 @@ database connection libraries, distributed task queues, etc. asyncio is often a perfect fit for IO-bound and high-level **structured** network code. +.. seealso:: + + :ref:`a-conceptual-overview-of-asyncio` + Explanation of the fundamentals of asyncio. + asyncio provides a set of **high-level** APIs to: * :ref:`run Python coroutines ` concurrently and @@ -74,6 +79,10 @@ You can experiment with an ``asyncio`` concurrent context in the :term:`REPL`: >>> await asyncio.sleep(10, result='hello') 'hello' +This REPL provides limited compatibility with :envvar:`PYTHON_BASIC_REPL`. +It is recommended that the default REPL is used +for full functionality and the latest features. + .. audit-event:: cpython.run_stdin "" "" .. versionchanged:: 3.12.5 (also 3.11.10, 3.10.15, 3.9.20, and 3.8.20) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 834ab2536e6c14..529a7242443820 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -15,14 +15,9 @@ This module provides functions for encoding binary data to printable ASCII characters and decoding such encodings back to binary data. -It provides encoding and decoding functions for the encodings specified in -:rfc:`4648`, which defines the Base16, Base32, and Base64 algorithms, -and for the de-facto standard Ascii85 and Base85 encodings. - -The :rfc:`4648` encodings are suitable for encoding binary data so that it can be -safely sent by email, used as parts of URLs, or included as part of an HTTP -POST request. The encoding algorithm is not the same as the -:program:`uuencode` program. +This includes the :ref:`encodings specified in ` +:rfc:`4648` (Base64, Base32 and Base16) +and the non-standard :ref:`Base85 encodings `. There are two interfaces provided by this module. The modern interface supports encoding :term:`bytes-like objects ` to ASCII @@ -30,7 +25,7 @@ supports encoding :term:`bytes-like objects ` to ASCII strings containing ASCII to :class:`bytes`. Both base-64 alphabets defined in :rfc:`4648` (normal, and URL- and filesystem-safe) are supported. -The legacy interface does not support decoding from strings, but it does +The :ref:`legacy interface ` does not support decoding from strings, but it does provide functions for encoding and decoding to and from :term:`file objects `. It only supports the Base64 standard alphabet, and it adds newlines every 76 characters as per :rfc:`2045`. Note that if you are looking @@ -46,7 +41,15 @@ package instead. Any :term:`bytes-like objects ` are now accepted by all encoding and decoding functions in this module. Ascii85/Base85 support added. -The modern interface provides: + +.. _base64-rfc-4648: + +RFC 4648 Encodings +------------------ + +The :rfc:`4648` encodings are suitable for encoding binary data so that it can be +safely sent by email, used as parts of URLs, or included as part of an HTTP +POST request. .. function:: b64encode(s, altchars=None) @@ -181,6 +184,26 @@ The modern interface provides: incorrectly padded or if there are non-alphabet characters present in the input. +.. _base64-base-85: + +Base85 Encodings +----------------- + +Base85 encoding is not formally specified but rather a de facto standard, +thus different systems perform the encoding differently. + +The :func:`a85encode` and :func:`b85encode` functions in this module are two implementations of +the de facto standard. You should call the function with the Base85 +implementation used by the software you intend to work with. + +The two functions present in this module differ in how they handle the following: + +* Whether to include enclosing ``<~`` and ``~>`` markers +* Whether to include newline characters +* The set of ASCII characters used for encoding +* Handling of null bytes + +Refer to the documentation of the individual functions for more information. .. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -262,7 +285,10 @@ The modern interface provides: .. versionadded:: 3.13 -The legacy interface: +.. _base64-legacy: + +Legacy Interface +---------------- .. function:: decode(input, output) diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst index 90f042aa377711..2b87f2626b2ce1 100644 --- a/Doc/library/bdb.rst +++ b/Doc/library/bdb.rst @@ -240,7 +240,7 @@ The :mod:`bdb` module also defines two classes: Normally derived classes don't override the following methods, but they may if they want to redefine the definition of stopping and breakpoints. - .. method:: is_skipped_line(module_name) + .. method:: is_skipped_module(module_name) Return ``True`` if *module_name* matches any skip pattern. diff --git a/Doc/library/bisect.rst b/Doc/library/bisect.rst index 78da563397b625..d5ec4212c1f9f4 100644 --- a/Doc/library/bisect.rst +++ b/Doc/library/bisect.rst @@ -24,6 +24,16 @@ method to determine whether a value has been found. Instead, the functions only call the :meth:`~object.__lt__` method and will return an insertion point between values in an array. +.. note:: + + The functions in this module are not thread-safe. If multiple threads + concurrently use :mod:`bisect` functions on the same sequence, this + may result in undefined behaviour. Likewise, if the provided sequence + is mutated by a different thread while a :mod:`bisect` function + is operating on it, the result is undefined. For example, using + :py:func:`~bisect.insort_left` on the same list from multiple threads + may result in the list becoming unsorted. + .. _bisect functions: The following functions are provided: @@ -73,7 +83,7 @@ The following functions are provided: Insert *x* in *a* in sorted order. This function first runs :py:func:`~bisect.bisect_left` to locate an insertion point. - Next, it runs the :meth:`!insert` method on *a* to insert *x* at the + Next, it runs the :meth:`~sequence.insert` method on *a* to insert *x* at the appropriate position to maintain sort order. To support inserting records in a table, the *key* function (if any) is @@ -93,7 +103,7 @@ The following functions are provided: entries of *x*. This function first runs :py:func:`~bisect.bisect_right` to locate an insertion point. - Next, it runs the :meth:`!insert` method on *a* to insert *x* at the + Next, it runs the :meth:`~sequence.insert` method on *a* to insert *x* at the appropriate position to maintain sort order. To support inserting records in a table, the *key* function (if any) is diff --git a/Doc/library/bz2.rst b/Doc/library/bz2.rst index ebe2e43febaefa..12650861c0fb5d 100644 --- a/Doc/library/bz2.rst +++ b/Doc/library/bz2.rst @@ -25,6 +25,8 @@ The :mod:`bz2` module contains: * The :func:`compress` and :func:`decompress` functions for one-shot (de)compression. +.. include:: ../includes/optional-module.rst + (De)compression of files ------------------------ diff --git a/Doc/library/cmath.rst b/Doc/library/cmath.rst index 26518a0458fd81..b6d5dbee21dcd5 100644 --- a/Doc/library/cmath.rst +++ b/Doc/library/cmath.rst @@ -338,7 +338,7 @@ Constants .. data:: nan A floating-point "not a number" (NaN) value. Equivalent to - ``float('nan')``. + ``float('nan')``. See also :data:`math.nan`. .. versionadded:: 3.6 diff --git a/Doc/library/cmdline.rst b/Doc/library/cmdline.rst index 16c67ddbf7cec2..c43b10157f9aea 100644 --- a/Doc/library/cmdline.rst +++ b/Doc/library/cmdline.rst @@ -16,17 +16,17 @@ The following modules have a command-line interface. * :ref:`dis ` * :ref:`doctest ` * :mod:`!encodings.rot_13` -* :mod:`ensurepip` +* :ref:`ensurepip ` * :mod:`filecmp` * :mod:`fileinput` * :mod:`ftplib` * :ref:`gzip ` * :ref:`http.server ` -* :mod:`!idlelib` +* :ref:`idlelib ` * :ref:`inspect ` * :ref:`json ` * :ref:`mimetypes ` -* :mod:`pdb` +* :ref:`pdb ` * :ref:`pickle ` * :ref:`pickletools ` * :ref:`platform ` @@ -52,8 +52,8 @@ The following modules have a command-line interface. * :mod:`turtledemo` * :ref:`unittest ` * :ref:`uuid ` -* :mod:`venv` -* :mod:`webbrowser` +* :ref:`venv ` +* :ref:`webbrowser ` * :ref:`zipapp ` * :ref:`zipfile ` diff --git a/Doc/library/cmdlinelibs.rst b/Doc/library/cmdlinelibs.rst index 085d31af7bca1f..32f8c2c9f4ae32 100644 --- a/Doc/library/cmdlinelibs.rst +++ b/Doc/library/cmdlinelibs.rst @@ -1,7 +1,7 @@ .. _cmdlinelibs: ******************************** -Command Line Interface Libraries +Command-line interface libraries ******************************** The modules described in this chapter assist with implementing @@ -19,3 +19,4 @@ Here's an overview: curses.rst curses.ascii.rst curses.panel.rst + cmd.rst diff --git a/Doc/library/code.rst b/Doc/library/code.rst index 8f7692df9fb22d..52587c4dd8f8e8 100644 --- a/Doc/library/code.rst +++ b/Doc/library/code.rst @@ -22,6 +22,12 @@ build applications which provide an interactive interpreter prompt. it defaults to a newly created dictionary with key ``'__name__'`` set to ``'__console__'`` and key ``'__doc__'`` set to ``None``. + Note that functions and classes objects created under an + :class:`!InteractiveInterpreter` instance will belong to the namespace + specified by *locals*. + They are only pickleable if *locals* is the namespace of an existing + module. + .. class:: InteractiveConsole(locals=None, filename="", local_exit=False) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 86511602fa5a60..24b5a9d64b2cd2 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -53,6 +53,14 @@ any codec: :exc:`UnicodeDecodeError`). Refer to :ref:`codec-base-classes` for more information on codec error handling. +.. function:: charmap_build(string) + + Return a mapping suitable for encoding with a custom single-byte encoding. + Given a :class:`str` *string* of up to 256 characters representing a + decoding table, returns either a compact internal mapping object + ``EncodingMap`` or a :class:`dictionary ` mapping character ordinals + to byte values. Raises a :exc:`TypeError` on invalid input. + The full details for each codec can also be looked up directly: .. function:: lookup(encoding, /) @@ -235,8 +243,8 @@ wider range of codecs when working with binary files: .. function:: iterencode(iterator, encoding, errors='strict', **kwargs) Uses an incremental encoder to iteratively encode the input provided by - *iterator*. This function is a :term:`generator`. - The *errors* argument (as well as any + *iterator*. *iterator* must yield :class:`str` objects. + This function is a :term:`generator`. The *errors* argument (as well as any other keyword argument) is passed through to the incremental encoder. This function requires that the codec accept text :class:`str` objects @@ -247,8 +255,8 @@ wider range of codecs when working with binary files: .. function:: iterdecode(iterator, encoding, errors='strict', **kwargs) Uses an incremental decoder to iteratively decode the input provided by - *iterator*. This function is a :term:`generator`. - The *errors* argument (as well as any + *iterator*. *iterator* must yield :class:`bytes` objects. + This function is a :term:`generator`. The *errors* argument (as well as any other keyword argument) is passed through to the incremental decoder. This function requires that the codec accept :class:`bytes` objects @@ -257,6 +265,20 @@ wider range of codecs when working with binary files: :func:`iterencode`. +.. function:: readbuffer_encode(buffer, errors=None, /) + + Return a :class:`tuple` containing the raw bytes of *buffer*, a + :ref:`buffer-compatible object ` or :class:`str` + (encoded to UTF-8 before processing), and their length in bytes. + + The *errors* argument is ignored. + + .. code-block:: pycon + + >>> codecs.readbuffer_encode(b"Zito") + (b'Zito', 4) + + The module also provides the following constants which are useful for reading and writing to platform dependent files: @@ -960,17 +982,22 @@ defined in Unicode. A simple and straightforward way that can store each Unicode code point, is to store each code point as four consecutive bytes. There are two possibilities: store the bytes in big endian or in little endian order. These two encodings are called ``UTF-32-BE`` and ``UTF-32-LE`` respectively. Their -disadvantage is that if e.g. you use ``UTF-32-BE`` on a little endian machine you -will always have to swap bytes on encoding and decoding. ``UTF-32`` avoids this -problem: bytes will always be in natural endianness. When these bytes are read -by a CPU with a different endianness, then bytes have to be swapped though. To -be able to detect the endianness of a ``UTF-16`` or ``UTF-32`` byte sequence, -there's the so called BOM ("Byte Order Mark"). This is the Unicode character -``U+FEFF``. This character can be prepended to every ``UTF-16`` or ``UTF-32`` -byte sequence. The byte swapped version of this character (``0xFFFE``) is an -illegal character that may not appear in a Unicode text. So when the -first character in a ``UTF-16`` or ``UTF-32`` byte sequence -appears to be a ``U+FFFE`` the bytes have to be swapped on decoding. +disadvantage is that if, for example, you use ``UTF-32-BE`` on a little endian +machine you will always have to swap bytes on encoding and decoding. +Python's ``UTF-16`` and ``UTF-32`` codecs avoid this problem by using the +platform's native byte order when no BOM is present. +Python follows prevailing platform +practice, so native-endian data round-trips without redundant byte swapping, +even though the Unicode Standard defaults to big-endian when the byte order is +unspecified. When these bytes are read by a CPU with a different endianness, +the bytes have to be swapped. To be able to detect the endianness of a +``UTF-16`` or ``UTF-32`` byte sequence, a BOM ("Byte Order Mark") is used. +This is the Unicode character ``U+FEFF``. This character can be prepended to every +``UTF-16`` or ``UTF-32`` byte sequence. The byte swapped version of this character +(``0xFFFE``) is an illegal character that may not appear in a Unicode text. +When the first character of a ``UTF-16`` or ``UTF-32`` byte sequence is +``U+FFFE``, the bytes have to be swapped on decoding. + Unfortunately the character ``U+FEFF`` had a second purpose as a ``ZERO WIDTH NO-BREAK SPACE``: a character that has no width and doesn't allow a word to be split. It can e.g. be used to give hints to a ligature algorithm. @@ -1043,8 +1070,15 @@ or with dictionaries as mapping tables. The following table lists the codecs by name, together with a few common aliases, and the languages for which the encoding is likely used. Neither the list of aliases nor the list of languages is meant to be exhaustive. Notice that spelling alternatives that only differ in -case or use a hyphen instead of an underscore are also valid aliases; therefore, -e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec. +case or use a hyphen instead of an underscore are also valid aliases +because they are equivalent when normalized by +:func:`~encodings.normalize_encoding`. For example, ``'utf-8'`` is a valid +alias for the ``'utf_8'`` codec. + +.. note:: + + The below table lists the most common aliases, for a complete list + refer to the source :source:`aliases.py ` file. On Windows, ``cpXXX`` codecs are available for all code pages. But only codecs listed in the following table are guarantead to exist on @@ -1222,7 +1256,7 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | iso8859_3 | iso-8859-3, latin3, L3 | Esperanto, Maltese | +-----------------+--------------------------------+--------------------------------+ -| iso8859_4 | iso-8859-4, latin4, L4 | Baltic languages | +| iso8859_4 | iso-8859-4, latin4, L4 | Northern Europe | +-----------------+--------------------------------+--------------------------------+ | iso8859_5 | iso-8859-5, cyrillic | Belarusian, Bulgarian, | | | | Macedonian, Russian, Serbian | @@ -1373,7 +1407,11 @@ encodings. | | | It is used in the Python | | | | pickle protocol. | +--------------------+---------+---------------------------+ -| undefined | | Raise an exception for | +| undefined | | This Codec should only | +| | | be used for testing | +| | | purposes. | +| | | | +| | | Raise an exception for | | | | all conversions, even | | | | empty strings. The error | | | | handler is ignored. | @@ -1450,6 +1488,36 @@ to :class:`bytes` mappings. They are not supported by :meth:`bytes.decode` Restoration of the aliases for the binary transforms. +.. _standalone-codec-functions: + +Standalone Codec Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following functions provide encoding and decoding functionality similar to codecs, +but are not available as named codecs through :func:`codecs.encode` or :func:`codecs.decode`. +They are used internally (for example, by :mod:`pickle`) and behave similarly to the +``string_escape`` codec that was removed in Python 3. + +.. function:: codecs.escape_encode(input, errors=None) + + Encode *input* using escape sequences. Similar to how :func:`repr` on bytes + produces escaped byte values. + + *input* must be a :class:`bytes` object. + + Returns a tuple ``(output, length)`` where *output* is a :class:`bytes` + object and *length* is the number of bytes consumed. + +.. function:: codecs.escape_decode(input, errors=None) + + Decode *input* from escape sequences back to the original bytes. + + *input* must be a :term:`bytes-like object`. + + Returns a tuple ``(output, length)`` where *output* is a :class:`bytes` + object and *length* is the number of bytes consumed. + + .. _text-transforms: Text Transforms @@ -1476,6 +1544,66 @@ mapping. It is not supported by :meth:`str.encode` (which only produces Restoration of the ``rot13`` alias. +:mod:`encodings` --- Encodings package +-------------------------------------- + +.. module:: encodings + :synopsis: Encodings package + +This module implements the following functions: + +.. function:: normalize_encoding(encoding) + + Normalize encoding name *encoding*. + + Normalization works as follows: all non-alphanumeric characters except the + dot used for Python package names are collapsed and replaced with a single + underscore, leading and trailing underscores are removed. + For example, ``' -;#'`` becomes ``'_'``. + + Note that *encoding* should be ASCII only. + + +.. note:: + The following functions should not be used directly, except for testing + purposes; :func:`codecs.lookup` should be used instead. + + +.. function:: search_function(encoding) + + Search for the codec module corresponding to the given encoding name + *encoding*. + + This function first normalizes the *encoding* using + :func:`normalize_encoding`, then looks for a corresponding alias. + It attempts to import a codec module from the encodings package using either + the alias or the normalized name. If the module is found and defines a valid + ``getregentry()`` function that returns a :class:`codecs.CodecInfo` object, + the codec is cached and returned. + + If the codec module defines a ``getaliases()`` function any returned aliases + are registered for future use. + + +.. function:: win32_code_page_search_function(encoding) + + Search for a Windows code page encoding *encoding* of the form ``cpXXXX``. + + If the code page is valid and supported, return a :class:`codecs.CodecInfo` + object for it. + + .. availability:: Windows. + + .. versionadded:: 3.14 + + +This module implements the following exception: + +.. exception:: CodecRegistryError + + Raised when a codec is invalid or incompatible. + + :mod:`encodings.idna` --- Internationalized Domain Names in Applications ------------------------------------------------------------------------ diff --git a/Doc/library/collections.abc.rst b/Doc/library/collections.abc.rst index daa9af6d1dd9c9..e6daccb91f2b4e 100644 --- a/Doc/library/collections.abc.rst +++ b/Doc/library/collections.abc.rst @@ -140,6 +140,9 @@ ABC Inherits from Abstract Methods Mi ``__len__``, ``insert`` +:class:`ByteString` :class:`Sequence` ``__getitem__``, Inherited :class:`Sequence` methods + ``__len__`` + :class:`Set` :class:`Collection` ``__contains__``, ``__le__``, ``__lt__``, ``__eq__``, ``__ne__``, ``__iter__``, ``__gt__``, ``__ge__``, ``__and__``, ``__or__``, ``__len__`` ``__sub__``, ``__rsub__``, ``__xor__``, ``__rxor__`` @@ -260,21 +263,50 @@ Collections Abstract Base Classes -- Detailed Descriptions .. class:: Sequence MutableSequence + ByteString ABCs for read-only and mutable :term:`sequences `. Implementation note: Some of the mixin methods, such as - :meth:`~container.__iter__`, :meth:`~object.__reversed__` and :meth:`index`, make - repeated calls to the underlying :meth:`~object.__getitem__` method. + :meth:`~container.__iter__`, :meth:`~object.__reversed__`, + and :meth:`~sequence.index` make repeated calls to the underlying + :meth:`~object.__getitem__` method. Consequently, if :meth:`~object.__getitem__` is implemented with constant access speed, the mixin methods will have linear performance; however, if the underlying method is linear (as it would be with a linked list), the mixins will have quadratic performance and will likely need to be overridden. - .. versionchanged:: 3.5 - The index() method added support for *stop* and *start* - arguments. + .. method:: index(value, start=0, stop=None) + + Return first index of *value*. + + Raises :exc:`ValueError` if the value is not present. + + Supporting the *start* and *stop* arguments is optional, but recommended. + + .. versionchanged:: 3.5 + The :meth:`~sequence.index` method gained support for + the *stop* and *start* arguments. + + .. deprecated-removed:: 3.12 3.17 + The :class:`ByteString` ABC has been deprecated. + + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the :ref:`buffer protocol ` at runtime. For use + in type annotations, either use :class:`Buffer` or a union that + explicitly specifies the types your code supports (e.g., + ``bytes | bytearray | memoryview``). + + :class:`!ByteString` was originally intended to be an abstract class that + would serve as a supertype of both :class:`bytes` and :class:`bytearray`. + However, since the ABC never had any methods, knowing that an object was + an instance of :class:`!ByteString` never actually told you anything + useful about the object. Other common buffer types such as + :class:`memoryview` were also never understood as subtypes of + :class:`!ByteString` (either at runtime or by static type checkers). + + See :pep:`PEP 688 <688#current-options>` for more details. .. class:: Set MutableSet @@ -304,7 +336,7 @@ Collections Abstract Base Classes -- Detailed Descriptions .. note:: In CPython, generator-based coroutines (:term:`generators ` - decorated with :func:`@types.coroutine `) are + decorated with :deco:`types.coroutine`) are *awaitables*, even though they do not have an :meth:`~object.__await__` method. Using ``isinstance(gencoro, Awaitable)`` for them will return ``False``. Use :func:`inspect.isawaitable` to detect them. @@ -322,7 +354,7 @@ Collections Abstract Base Classes -- Detailed Descriptions .. note:: In CPython, generator-based coroutines (:term:`generators ` - decorated with :func:`@types.coroutine `) are + decorated with :deco:`types.coroutine`) are *awaitables*, even though they do not have an :meth:`~object.__await__` method. Using ``isinstance(gencoro, Coroutine)`` for them will return ``False``. Use :func:`inspect.isawaitable` to detect them. diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 5fbdb12f40cafa..fdd31799bd90d3 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -758,9 +758,9 @@ stack manipulations such as ``dup``, ``drop``, ``swap``, ``over``, ``pick``, .. attribute:: default_factory - This attribute is used by the :meth:`__missing__` method; it is - initialized from the first argument to the constructor, if present, or to - ``None``, if absent. + This attribute is used by the :meth:`~defaultdict.__missing__` method; + it is initialized from the first argument to the constructor, if present, + or to ``None``, if absent. .. versionchanged:: 3.9 Added merge (``|``) and update (``|=``) operators, specified in @@ -783,10 +783,10 @@ sequence of key-value pairs into a dictionary of lists: When each key is encountered for the first time, it is not already in the mapping; so an entry is automatically created using the :attr:`~defaultdict.default_factory` -function which returns an empty :class:`list`. The :meth:`!list.append` +function which returns an empty :class:`list`. The :meth:`list.append` operation then attaches the value to the new list. When keys are encountered again, the look-up proceeds normally (returning the list for that key) and the -:meth:`!list.append` operation adds another value to the list. This technique is +:meth:`list.append` operation adds another value to the list. This technique is simpler and faster than an equivalent technique using :meth:`dict.setdefault`: >>> d = {} diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index c42288419c4d2d..ebbbf857e717a4 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -56,11 +56,18 @@ compile Python sources. executed. .. option:: -s strip_prefix + + Remove the given prefix from paths recorded in the ``.pyc`` files. + Paths are made relative to the prefix. + + This option can be used with ``-p`` but not with ``-d``. + .. option:: -p prepend_prefix - Remove (``-s``) or append (``-p``) the given prefix of paths - recorded in the ``.pyc`` files. - Cannot be combined with ``-d``. + Prepend the given prefix to paths recorded in the ``.pyc`` files. + Use ``-p /`` to make the paths absolute. + + This option can be used with ``-s`` but not with ``-d``. .. option:: -x regex diff --git a/Doc/library/compression.rst b/Doc/library/compression.rst new file mode 100644 index 00000000000000..98719be9992acc --- /dev/null +++ b/Doc/library/compression.rst @@ -0,0 +1,20 @@ +The :mod:`!compression` package +=============================== + +.. module:: compression + +.. versionadded:: 3.14 + +The :mod:`!compression` package contains the canonical compression modules +containing interfaces to several different compression algorithms. Some of +these modules have historically been available as separate modules; those will +continue to be available under their original names for compatibility reasons, +and will not be removed without a deprecation cycle. The use of modules in +:mod:`!compression` is encouraged where practical. + +* :mod:`!compression.bz2` -- Re-exports :mod:`bz2` +* :mod:`!compression.gzip` -- Re-exports :mod:`gzip` +* :mod:`!compression.lzma` -- Re-exports :mod:`lzma` +* :mod:`!compression.zlib` -- Re-exports :mod:`zlib` +* :mod:`compression.zstd` -- Wrapper for the Zstandard compression library + diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst new file mode 100644 index 00000000000000..89b6fe540f5ba7 --- /dev/null +++ b/Doc/library/compression.zstd.rst @@ -0,0 +1,899 @@ +:mod:`!compression.zstd` --- Compression compatible with the Zstandard format +============================================================================= + +.. module:: compression.zstd + :synopsis: Low-level interface to compression and decompression routines in + the zstd library. + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/compression/zstd/__init__.py` + +-------------- + +This module provides classes and functions for compressing and decompressing +data using the Zstandard (or *zstd*) compression algorithm. The +`zstd manual `__ +describes Zstandard as "a fast lossless compression algorithm, targeting +real-time compression scenarios at zlib-level and better compression ratios." +Also included is a file interface that supports reading and writing the +contents of ``.zst`` files created by the :program:`zstd` utility, as well as +raw zstd compressed streams. + +The :mod:`!compression.zstd` module contains: + +* The :func:`.open` function and :class:`ZstdFile` class for reading and + writing compressed files. +* The :class:`ZstdCompressor` and :class:`ZstdDecompressor` classes for + incremental (de)compression. +* The :func:`compress` and :func:`decompress` functions for one-shot + (de)compression. +* The :func:`train_dict` and :func:`finalize_dict` functions and the + :class:`ZstdDict` class to train and manage Zstandard dictionaries. +* The :class:`CompressionParameter`, :class:`DecompressionParameter`, and + :class:`Strategy` classes for setting advanced (de)compression parameters. + +.. include:: ../includes/optional-module.rst + + +Exceptions +---------- + +.. exception:: ZstdError + + This exception is raised when an error occurs during compression or + decompression, or while initializing the (de)compressor state. + + +Reading and writing compressed files +------------------------------------ + +.. function:: open(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None, encoding=None, errors=None, newline=None) + + Open a Zstandard-compressed file in binary or text mode, returning a + :term:`file object`. + + The *file* argument can be either a file name (given as a + :class:`str`, :class:`bytes` or :term:`path-like ` + object), in which case the named file is opened, or it can be an existing + file object to read from or write to. + + The mode argument can be either ``'rb'`` for reading (default), ``'wb'`` for + overwriting, ``'ab'`` for appending, or ``'xb'`` for exclusive creation. + These can equivalently be given as ``'r'``, ``'w'``, ``'a'``, and ``'x'`` + respectively. You may also open in text mode with ``'rt'``, ``'wt'``, + ``'at'``, and ``'xt'`` respectively. + + When reading, the *options* argument can be a dictionary providing advanced + decompression parameters; see :class:`DecompressionParameter` for detailed + information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be non-None. + The *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + In binary mode, this function is equivalent to the :class:`ZstdFile` + constructor: ``ZstdFile(file, mode, ...)``. In this case, the + *encoding*, *errors*, and *newline* parameters must not be provided. + + In text mode, a :class:`ZstdFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line endings. + + +.. class:: ZstdFile(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None) + + Open a Zstandard-compressed file in binary mode. + + A :class:`ZstdFile` can wrap an already-open :term:`file object`, or operate + directly on a named file. The *file* argument specifies either the file + object to wrap, or the name of the file to open (as a :class:`str`, + :class:`bytes` or :term:`path-like ` object). If + wrapping an existing file object, the wrapped file will not be closed when + the :class:`ZstdFile` is closed. + + The *mode* argument can be either ``'rb'`` for reading (default), ``'wb'`` + for overwriting, ``'xb'`` for exclusive creation, or ``'ab'`` for appending. + These can equivalently be given as ``'r'``, ``'w'``, ``'x'`` and ``'a'`` + respectively. + + If *file* is a file object (rather than an actual file name), a mode of + ``'w'`` does not truncate the file, and is instead equivalent to ``'a'``. + + When reading, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`DecompressionParameter` for detailed information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be passed. The + *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + :class:`!ZstdFile` supports all the members specified by + :class:`io.BufferedIOBase`, except for :meth:`~io.BufferedIOBase.detach` + and :meth:`~io.IOBase.truncate`. + Iteration and the :keyword:`with` statement are supported. + + The following method and attributes are also provided: + + .. method:: peek(size=-1) + + Return buffered data without advancing the file position. At least one + byte of data will be returned, unless EOF has been reached. The exact + number of bytes returned is unspecified (the *size* argument is ignored). + + .. note:: While calling :meth:`peek` does not change the file position of + the :class:`ZstdFile`, it may change the position of the underlying + file object (for example, if the :class:`ZstdFile` was constructed by + passing a file object for *file*). + + .. attribute:: mode + + ``'rb'`` for reading and ``'wb'`` for writing. + + .. attribute:: name + + The name of the Zstandard file. Equivalent to the :attr:`~io.FileIO.name` + attribute of the underlying :term:`file object`. + + +Compressing and decompressing data in memory +-------------------------------------------- + +.. function:: compress(data, level=None, options=None, zstd_dict=None) + + Compress *data* (a :term:`bytes-like object`), returning the compressed + data as a :class:`bytes` object. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + +.. function:: decompress(data, zstd_dict=None, options=None) + + Decompress *data* (a :term:`bytes-like object`), returning the uncompressed + data as a :class:`bytes` object. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + If *data* is the concatenation of multiple distinct compressed frames, + decompress all of these frames, and return the concatenation of the results. + + +.. class:: ZstdCompressor(level=None, options=None, zstd_dict=None) + + Create a compressor object, which can be used to compress data + incrementally. + + For a more convenient way of compressing a single chunk of data, see the + module-level function :func:`compress`. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an optional instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + + .. method:: compress(data, mode=ZstdCompressor.CONTINUE) + + Compress *data* (a :term:`bytes-like object`), returning a :class:`bytes` + object with compressed data if possible, or otherwise an empty + :class:`!bytes` object. Some of *data* may be buffered internally, for + use in later calls to :meth:`!compress` and :meth:`~.flush`. The returned + data should be concatenated with the output of any previous calls to + :meth:`~.compress`. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, + or :attr:`~.FLUSH_FRAME`. + + When all data has been provided to the compressor, call the + :meth:`~.flush` method to finish the compression process. If + :meth:`~.compress` is called with *mode* set to :attr:`~.FLUSH_FRAME`, + :meth:`~.flush` should not be called, as it would write out a new empty + frame. + + .. method:: flush(mode=ZstdCompressor.FLUSH_FRAME) + + Finish the compression process, returning a :class:`bytes` object + containing any data stored in the compressor's internal buffers. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.FLUSH_BLOCK`, or :attr:`~.FLUSH_FRAME`. + + .. method:: set_pledged_input_size(size) + + Specify the amount of uncompressed data *size* that will be provided for + the next frame. *size* will be written into the frame header of the next + frame unless :attr:`CompressionParameter.content_size_flag` is ``False`` + or ``0``. A size of ``0`` means that the frame is empty. If *size* is + ``None``, the frame header will omit the frame size. Frames that include + the uncompressed data size require less memory to decompress, especially + at higher compression levels. + + If :attr:`last_mode` is not :attr:`FLUSH_FRAME`, a + :exc:`ValueError` is raised as the compressor is not at the start of + a frame. If the pledged size does not match the actual size of data + provided to :meth:`.compress`, future calls to :meth:`!compress` or + :meth:`flush` may raise :exc:`ZstdError` and the last chunk of data may + be lost. + + After :meth:`flush` or :meth:`.compress` are called with mode + :attr:`FLUSH_FRAME`, the next frame will not include the frame size into + the header unless :meth:`!set_pledged_input_size` is called again. + + .. attribute:: CONTINUE + + Collect more data for compression, which may or may not generate output + immediately. This mode optimizes the compression ratio by maximizing the + amount of data per block and frame. + + .. attribute:: FLUSH_BLOCK + + Complete and write a block to the data stream. The data returned so far + can be immediately decompressed. Past data can still be referenced in + future blocks generated by calls to :meth:`~.compress`, + improving compression. + + .. attribute:: FLUSH_FRAME + + Complete and write out a frame. Future data provided to + :meth:`~.compress` will be written into a new frame and + *cannot* reference past data. + + .. attribute:: last_mode + + The last mode passed to either :meth:`~.compress` or :meth:`~.flush`. + The value can be one of :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, or + :attr:`~.FLUSH_FRAME`. The initial value is :attr:`~.FLUSH_FRAME`, + signifying that the compressor is at the start of a new frame. + + +.. class:: ZstdDecompressor(zstd_dict=None, options=None) + + Create a decompressor object, which can be used to decompress data + incrementally. + + For a more convenient way of decompressing an entire compressed stream at + once, see the module-level function :func:`decompress`. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + .. note:: + This class does not transparently handle inputs containing multiple + compressed frames, unlike the :func:`decompress` function and + :class:`ZstdFile` class. To decompress a multi-frame input, you should + use :func:`decompress`, :class:`ZstdFile` if working with a + :term:`file object`, or multiple :class:`!ZstdDecompressor` instances. + + .. method:: decompress(data, max_length=-1) + + Decompress *data* (a :term:`bytes-like object`), returning + uncompressed data as bytes. Some of *data* may be buffered + internally, for use in later calls to :meth:`!decompress`. + The returned data should be concatenated with the output of any previous + calls to :meth:`!decompress`. + + If *max_length* is non-negative, the method returns at most *max_length* + bytes of decompressed data. If this limit is reached and further + output can be produced, the :attr:`~.needs_input` attribute will + be set to ``False``. In this case, the next call to + :meth:`~.decompress` may provide *data* as ``b''`` to obtain + more of the output. + + If all of the input data was decompressed and returned (either + because this was less than *max_length* bytes, or because + *max_length* was negative), the :attr:`~.needs_input` attribute + will be set to ``True``. + + Attempting to decompress data after the end of a frame will raise a + :exc:`ZstdError`. Any data found after the end of the frame is ignored + and saved in the :attr:`~.unused_data` attribute. + + .. attribute:: eof + + ``True`` if the end-of-stream marker has been reached. + + .. attribute:: unused_data + + Data found after the end of the compressed stream. + + Before the end of the stream is reached, this will be ``b''``. + + .. attribute:: needs_input + + ``False`` if the :meth:`.decompress` method can provide more + decompressed data before requiring new compressed input. + + +Zstandard dictionaries +---------------------- + + +.. function:: train_dict(samples, dict_size) + + Train a Zstandard dictionary, returning a :class:`ZstdDict` instance. + Zstandard dictionaries enable more efficient compression of smaller sizes + of data, which is traditionally difficult to compress due to less + repetition. If you are compressing multiple similar groups of data (such as + similar files), Zstandard dictionaries can improve compression ratios and + speed significantly. + + The *samples* argument (an iterable of :class:`bytes` objects), is the + population of samples used to train the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. The Zstandard documentation suggests an + absolute maximum of no more than 100 KB, but the maximum can often be smaller + depending on the data. Larger dictionaries generally slow down compression, + but improve compression ratios. Smaller dictionaries lead to faster + compression, but reduce the compression ratio. + + +.. function:: finalize_dict(zstd_dict, /, samples, dict_size, level) + + An advanced function for converting a "raw content" Zstandard dictionary into + a regular Zstandard dictionary. "Raw content" dictionaries are a sequence of + bytes that do not need to follow the structure of a normal Zstandard + dictionary. + + The *zstd_dict* argument is a :class:`ZstdDict` instance with + the :attr:`~ZstdDict.dict_content` containing the raw dictionary contents. + + The *samples* argument (an iterable of :class:`bytes` objects), contains + sample data for generating the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. See :func:`train_dict` for + suggestions on the maximum dictionary size. + + The *level* argument (an integer) is the compression level expected to be + passed to the compressors using this dictionary. The dictionary information + varies for each compression level, so tuning for the proper compression + level can make compression more efficient. + + +.. class:: ZstdDict(dict_content, /, *, is_raw=False) + + A wrapper around Zstandard dictionaries. Dictionaries can be used to improve + the compression of many small chunks of data. Use :func:`train_dict` if you + need to train a new dictionary from sample data. + + The *dict_content* argument (a :term:`bytes-like object`), is the already + trained dictionary information. + + The *is_raw* argument, a boolean, is an advanced parameter controlling the + meaning of *dict_content*. ``True`` means *dict_content* is a "raw content" + dictionary, without any format restrictions. ``False`` means *dict_content* + is an ordinary Zstandard dictionary, created from Zstandard functions, + for example, :func:`train_dict` or the external :program:`zstd` CLI. + + When passing a :class:`!ZstdDict` to a function, the + :attr:`!as_digested_dict` and :attr:`!as_undigested_dict` attributes can + control how the dictionary is loaded by passing them as the ``zstd_dict`` + argument, for example, ``compress(data, zstd_dict=zd.as_digested_dict)``. + Digesting a dictionary is a costly operation that occurs when loading a + Zstandard dictionary. When making multiple calls to compression or + decompression, passing a digested dictionary will reduce the overhead of + loading the dictionary. + + .. list-table:: Difference for compression + :widths: 10 14 10 + :header-rows: 1 + + * - + - Digested dictionary + - Undigested dictionary + * - Advanced parameters of the compressor which may be overridden by + the dictionary's parameters + - ``window_log``, ``hash_log``, ``chain_log``, ``search_log``, + ``min_match``, ``target_length``, ``strategy``, + ``enable_long_distance_matching``, ``ldm_hash_log``, + ``ldm_min_match``, ``ldm_bucket_size_log``, ``ldm_hash_rate_log``, + and some non-public parameters. + - None + * - :class:`!ZstdDict` internally caches the dictionary + - Yes. It's faster when loading a digested dictionary again with the + same compression level. + - No. If you wish to load an undigested dictionary multiple times, + consider reusing a compressor object. + + If passing a :class:`!ZstdDict` without any attribute, an undigested + dictionary is passed by default when compressing and a digested dictionary + is generated if necessary and passed by default when decompressing. + + .. attribute:: dict_content + + The content of the Zstandard dictionary, a ``bytes`` object. It's the + same as the *dict_content* argument in the ``__init__`` method. It can + be used with other programs, such as the ``zstd`` CLI program. + + .. attribute:: dict_id + + Identifier of the Zstandard dictionary, a non-negative int value. + + Non-zero means the dictionary is ordinary, created by Zstandard + functions and following the Zstandard format. + + ``0`` means a "raw content" dictionary, free of any format restriction, + used for advanced users. + + .. note:: + + The meaning of ``0`` for :attr:`!ZstdDict.dict_id` is different + from the ``dictionary_id`` attribute to the :func:`get_frame_info` + function. + + .. attribute:: as_digested_dict + + Load as a digested dictionary. + + .. attribute:: as_undigested_dict + + Load as an undigested dictionary. + + +Advanced parameter control +-------------------------- + +.. class:: CompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced compression parameter + keys that can be used when compressing data. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Parameters are optional; any omitted parameter will have it's value selected + automatically. + + Example getting the lower and upper bound of :attr:`~.compression_level`:: + + lower, upper = CompressionParameter.compression_level.bounds() + + Example setting the :attr:`~.window_log` to the maximum size:: + + _lower, upper = CompressionParameter.window_log.bounds() + options = {CompressionParameter.window_log: upper} + compress(b'venezuelan beaver cheese', options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a compression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. For example, to get the valid values for + :attr:`~.compression_level`, one may check the result of + ``CompressionParameter.compression_level.bounds()``. + + Both the lower and upper bounds are inclusive. + + .. attribute:: compression_level + + A high-level means of setting other compression parameters that affect + the speed and ratio of compressing data. + + Regular compression levels are greater than ``0``. Values greater than + ``20`` are considered "ultra" compression and require more memory than + other levels. Negative values can be used to trade off faster compression + for worse compression ratios. + + Setting the level to zero uses :attr:`COMPRESSION_LEVEL_DEFAULT`. + + .. attribute:: window_log + + Maximum allowed back-reference distance the compressor can use when + compressing data, expressed as power of two, ``1 << window_log`` bytes. + This parameter greatly influences the memory usage of compression. Higher + values require more memory but gain better compression values. + + A value of zero causes the value to be selected automatically. + + .. attribute:: hash_log + + Size of the initial probe table, as a power of two. The resulting memory + usage is ``1 << (hash_log+2)`` bytes. Larger tables improve compression + ratio of strategies <= :attr:`~Strategy.dfast`, and improve compression + speed of strategies > :attr:`~Strategy.dfast`. + + A value of zero causes the value to be selected automatically. + + .. attribute:: chain_log + + Size of the multi-probe search table, as a power of two. The resulting + memory usage is ``1 << (chain_log+2)`` bytes. Larger tables result in + better and slower compression. This parameter has no effect for the + :attr:`~Strategy.fast` strategy. It's still useful when using + :attr:`~Strategy.dfast` strategy, in which case it defines a secondary + probe table. + + A value of zero causes the value to be selected automatically. + + .. attribute:: search_log + + Number of search attempts, as a power of two. More attempts result in + better and slower compression. This parameter is useless for + :attr:`~Strategy.fast` and :attr:`~Strategy.dfast` strategies. + + A value of zero causes the value to be selected automatically. + + .. attribute:: min_match + + Minimum size of searched matches. Larger values increase compression and + decompression speed, but decrease ratio. Note that Zstandard can still + find matches of smaller size, it just tweaks its search algorithm to look + for this size and larger. For all strategies < :attr:`~Strategy.btopt`, + the effective minimum is ``4``; for all strategies + > :attr:`~Strategy.fast`, the effective maximum is ``6``. + + A value of zero causes the value to be selected automatically. + + .. attribute:: target_length + + The impact of this field depends on the selected :class:`Strategy`. + + For strategies :attr:`~Strategy.btopt`, :attr:`~Strategy.btultra` and + :attr:`~Strategy.btultra2`, the value is the length of a match + considered "good enough" to stop searching. Larger values make + compression ratios better, but compresses slower. + + For strategy :attr:`~Strategy.fast`, it is the distance between match + sampling. Larger values make compression faster, but with a worse + compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: strategy + + The higher the value of selected strategy, the more complex the + compression technique used by zstd, resulting in higher compression + ratios but slower compression. + + .. seealso:: :class:`Strategy` + + .. attribute:: enable_long_distance_matching + + Long distance matching can be used to improve compression for large + inputs by finding large matches at greater distances. It increases memory + usage and window size. + + ``True`` or ``1`` enable long distance matching while ``False`` or ``0`` + disable it. + + Enabling this parameter increases default + :attr:`~CompressionParameter.window_log` to 128 MiB except when expressly + set to a different value. This setting is enabled by default if + :attr:`!window_log` >= 128 MiB and the compression + strategy >= :attr:`~Strategy.btopt` (compression level 16+). + + .. attribute:: ldm_hash_log + + Size of the table for long distance matching, as a power of two. Larger + values increase memory usage and compression ratio, but decrease + compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_min_match + + Minimum match size for long distance matcher. Larger or too small values + can often decrease the compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_bucket_size_log + + Log size of each bucket in the long distance matcher hash table for + collision resolution. Larger values improve collision resolution but + decrease compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_hash_rate_log + + Frequency of inserting/looking up entries into the long distance matcher + hash table. Larger values improve compression speed. Deviating far from + the default value will likely result in a compression ratio decrease. + + A value of zero causes the value to be selected automatically. + + .. attribute:: content_size_flag + + Write the size of the data to be compressed into the Zstandard frame + header when known prior to compressing. + + This flag only takes effect under the following scenarios: + + * Calling :func:`compress` for one-shot compression + * Providing all of the data to be compressed in the frame in a single + :meth:`ZstdCompressor.compress` call, with the + :attr:`ZstdCompressor.FLUSH_FRAME` mode. + * Calling :meth:`ZstdCompressor.set_pledged_input_size` with the exact + amount of data that will be provided to the compressor prior to any + calls to :meth:`ZstdCompressor.compress` for the current frame. + :meth:`!ZstdCompressor.set_pledged_input_size` must be called for each + new frame. + + All other compression calls may not write the size information into the + frame header. + + ``True`` or ``1`` enable the content size flag while ``False`` or ``0`` + disable it. + + .. attribute:: checksum_flag + + A four-byte checksum using XXHash64 of the uncompressed content is + written at the end of each frame. Zstandard's decompression code verifies + the checksum. If there is a mismatch a :class:`ZstdError` exception is + raised. + + ``True`` or ``1`` enable checksum generation while ``False`` or ``0`` + disable it. + + .. attribute:: dict_id_flag + + When compressing with a :class:`ZstdDict`, the dictionary's ID is written + into the frame header. + + ``True`` or ``1`` enable storing the dictionary ID while ``False`` or + ``0`` disable it. + + .. attribute:: nb_workers + + Select how many threads will be spawned to compress in parallel. When + :attr:`!nb_workers` > 0, enables multi-threaded compression, a value of + ``1`` means "one-thread multi-threaded mode". More workers improve speed, + but also increase memory usage and slightly reduce compression ratio. + + A value of zero disables multi-threading. + + .. attribute:: job_size + + Size of a compression job, in bytes. This value is enforced only when + :attr:`~CompressionParameter.nb_workers` >= 1. Each compression job is + completed in parallel, so this value can indirectly impact the number of + active threads. + + A value of zero causes the value to be selected automatically. + + .. attribute:: overlap_log + + Sets how much data is reloaded from previous jobs (threads) for new jobs + to be used by the look behind window during compression. This value is + only used when :attr:`~CompressionParameter.nb_workers` >= 1. Acceptable + values vary from 0 to 9. + + * 0 means dynamically set the overlap amount + * 1 means no overlap + * 9 means use a full window size from the previous job + + Each increment halves/doubles the overlap size. "8" means an overlap of + ``window_size/2``, "7" means an overlap of ``window_size/4``, etc. + +.. class:: DecompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced decompression parameter + keys that can be used when decompressing data. Parameters are optional; any + omitted parameter will have it's value selected automatically. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Example setting the :attr:`~.window_log_max` to the maximum size:: + + data = compress(b'Some very long buffer of bytes...') + + _lower, upper = DecompressionParameter.window_log_max.bounds() + + options = {DecompressionParameter.window_log_max: upper} + decompress(data, options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a decompression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. + + Both the lower and upper bounds are inclusive. + + .. attribute:: window_log_max + + The base-two logarithm of the maximum size of the window used during + decompression. This can be useful to limit the amount of memory used when + decompressing data. A larger maximum window size leads to faster + decompression. + + A value of zero causes the value to be selected automatically. + + +.. class:: Strategy() + + An :class:`~enum.IntEnum` containing strategies for compression. + Higher-numbered strategies correspond to more complex and slower + compression. + + .. note:: + + The values of attributes of :class:`!Strategy` are not necessarily stable + across zstd versions. Only the ordering of the attributes may be relied + upon. The attributes are listed below in order. + + The following strategies are available: + + .. attribute:: fast + + .. attribute:: dfast + + .. attribute:: greedy + + .. attribute:: lazy + + .. attribute:: lazy2 + + .. attribute:: btlazy2 + + .. attribute:: btopt + + .. attribute:: btultra + + .. attribute:: btultra2 + + +Miscellaneous +------------- + +.. function:: get_frame_info(frame_buffer) + + Retrieve a :class:`FrameInfo` object containing metadata about a Zstandard + frame. Frames contain metadata related to the compressed data they hold. + + +.. class:: FrameInfo + + Metadata related to a Zstandard frame. + + .. attribute:: decompressed_size + + The size of the decompressed contents of the frame. + + .. attribute:: dictionary_id + + An integer representing the Zstandard dictionary ID needed for + decompressing the frame. ``0`` means the dictionary ID was not + recorded in the frame header. This may mean that a Zstandard dictionary + is not needed, or that the ID of a required dictionary was not recorded. + + +.. attribute:: COMPRESSION_LEVEL_DEFAULT + + The default compression level for Zstandard: ``3``. + + +.. attribute:: zstd_version_info + + Version number of the runtime zstd library as a tuple of integers + (major, minor, release). + + +Examples +-------- + +Reading in a compressed file: + +.. code-block:: python + + from compression import zstd + + with zstd.open("file.zst") as f: + file_content = f.read() + +Creating a compressed file: + +.. code-block:: python + + from compression import zstd + + data = b"Insert Data Here" + with zstd.open("file.zst", "w") as f: + f.write(data) + +Compressing data in memory: + +.. code-block:: python + + from compression import zstd + + data_in = b"Insert Data Here" + data_out = zstd.compress(data_in) + +Incremental compression: + +.. code-block:: python + + from compression import zstd + + comp = zstd.ZstdCompressor() + out1 = comp.compress(b"Some data\n") + out2 = comp.compress(b"Another piece of data\n") + out3 = comp.compress(b"Even more data\n") + out4 = comp.flush() + # Concatenate all the partial results: + result = b"".join([out1, out2, out3, out4]) + +Writing compressed data to an already-open file: + +.. code-block:: python + + from compression import zstd + + with open("myfile", "wb") as f: + f.write(b"This data will not be compressed\n") + with zstd.open(f, "w") as zstf: + zstf.write(b"This *will* be compressed\n") + f.write(b"Not compressed\n") + +Creating a compressed file using compression parameters: + +.. code-block:: python + + from compression import zstd + + options = { + zstd.CompressionParameter.checksum_flag: 1 + } + with zstd.open("file.zst", "w", options=options) as f: + f.write(b"Mind if I squeeze in?") diff --git a/Doc/library/concurrency.rst b/Doc/library/concurrency.rst index 5be1a1106b09a0..18f9443cbfea20 100644 --- a/Doc/library/concurrency.rst +++ b/Doc/library/concurrency.rst @@ -18,6 +18,7 @@ multitasking). Here's an overview: multiprocessing.shared_memory.rst concurrent.rst concurrent.futures.rst + concurrent.interpreters.rst subprocess.rst sched.rst queue.rst diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 7efae9e628b828..c2e2f7f820f4ef 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -6,8 +6,9 @@ .. versionadded:: 3.2 -**Source code:** :source:`Lib/concurrent/futures/thread.py` -and :source:`Lib/concurrent/futures/process.py` +**Source code:** :source:`Lib/concurrent/futures/thread.py`, +:source:`Lib/concurrent/futures/process.py`, +and :source:`Lib/concurrent/futures/interpreter.py` -------------- @@ -100,10 +101,10 @@ Executor Objects executor has started running will be completed prior to this method returning. The remaining futures are cancelled. - You can avoid having to call this method explicitly if you use the - :keyword:`with` statement, which will shutdown the :class:`Executor` - (waiting as if :meth:`Executor.shutdown` were called with *wait* set to - ``True``):: + You can avoid having to call this method explicitly if you use the executor + as a :term:`context manager` via the :keyword:`with` statement, which + will shutdown the :class:`Executor` (waiting as if :meth:`Executor.shutdown` + were called with *wait* set to ``True``):: import shutil with ThreadPoolExecutor(max_workers=4) as e: @@ -238,6 +239,8 @@ ThreadPoolExecutor Example InterpreterPoolExecutor ----------------------- +.. versionadded:: 3.14 + The :class:`InterpreterPoolExecutor` class uses a pool of interpreters to execute calls asynchronously. It is a :class:`ThreadPoolExecutor` subclass, which means each worker is running in its own thread. @@ -264,7 +267,7 @@ Each worker's interpreter is isolated from all the other interpreters. "Isolated" means each interpreter has its own runtime state and operates completely independently. For example, if you redirect :data:`sys.stdout` in one interpreter, it will not be automatically -redirected any other interpreter. If you import a module in one +redirected to any other interpreter. If you import a module in one interpreter, it is not automatically imported in any other. You would need to import the module separately in interpreter where you need it. In fact, each module imported in an interpreter is @@ -286,7 +289,7 @@ efficient alternative is to serialize with :mod:`pickle` and then send the bytes over a shared :mod:`socket ` or :func:`pipe `. -.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None) +.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=()) A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously using a pool of at most *max_workers* threads. Each thread runs @@ -303,21 +306,10 @@ the bytes over a shared :mod:`socket ` or and *initargs* using :mod:`pickle` when sending them to the worker's interpreter. - .. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - .. note:: The executor may replace uncaught exceptions from *initializer* with :class:`~concurrent.futures.interpreter.ExecutionFailed`. - The optional *shared* argument is a :class:`dict` of objects that all - interpreters in the pool share. The *shared* items are added to each - interpreter's ``__main__`` module. Not all objects are shareable. - Shareable objects include the builtin singletons, :class:`str` - and :class:`bytes`, and :class:`memoryview`. See :pep:`734` - for more info. - Other caveats from parent :class:`ThreadPoolExecutor` apply here. :meth:`~Executor.submit` and :meth:`~Executor.map` work like normal, @@ -325,10 +317,6 @@ except the worker serializes the callable and arguments using :mod:`pickle` when sending them to its interpreter. The worker likewise serializes the return value when sending it back. -.. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - When a worker's current task raises an uncaught exception, the worker always tries to preserve the exception as-is. If that is successful then it also sets the ``__cause__`` to a corresponding @@ -356,6 +344,11 @@ that :class:`ProcessPoolExecutor` will not work in the interactive interpreter. Calling :class:`Executor` or :class:`Future` methods from a callable submitted to a :class:`ProcessPoolExecutor` will result in deadlock. +Note that the restrictions on functions and arguments needing to picklable as +per :class:`multiprocessing.Process` apply when using :meth:`~Executor.submit` +and :meth:`~Executor.map` on a :class:`ProcessPoolExecutor`. A function defined +in a REPL or a lambda should not be expected to work. + .. class:: ProcessPoolExecutor(max_workers=None, mp_context=None, initializer=None, initargs=(), max_tasks_per_child=None) An :class:`Executor` subclass that executes calls asynchronously using a pool diff --git a/Doc/library/concurrent.interpreters.rst b/Doc/library/concurrent.interpreters.rst new file mode 100644 index 00000000000000..55036090e8d5b8 --- /dev/null +++ b/Doc/library/concurrent.interpreters.rst @@ -0,0 +1,387 @@ +:mod:`!concurrent.interpreters` --- Multiple interpreters in the same process +============================================================================= + +.. module:: concurrent.interpreters + :synopsis: Multiple interpreters in the same process + +.. moduleauthor:: Eric Snow +.. sectionauthor:: Eric Snow + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/concurrent/interpreters` + +-------------- + +The :mod:`!concurrent.interpreters` module constructs higher-level +interfaces on top of the lower level :mod:`!_interpreters` module. + +The module is primarily meant to provide a basic API for managing +interpreters (AKA "subinterpreters") and running things in them. +Running mostly involves switching to an interpreter (in the current +thread) and calling a function in that execution context. + +For concurrency, interpreters themselves (and this module) don't +provide much more than isolation, which on its own isn't useful. +Actual concurrency is available separately through +:mod:`threads ` See `below `_ + +.. seealso:: + + :class:`~concurrent.futures.InterpreterPoolExecutor` + Combines threads with interpreters in a familiar interface. + + .. XXX Add references to the upcoming HOWTO docs in the seealso block. + + :ref:`isolating-extensions-howto` + How to update an extension module to support multiple interpreters. + + :pep:`554` + + :pep:`734` + + :pep:`684` + +.. XXX Why do we disallow multiple interpreters on WASM? + +.. include:: ../includes/wasm-notavail.rst + + +Key details +----------- + +Before we dive in further, there are a small number of details +to keep in mind about using multiple interpreters: + +* `isolated `_, by default +* no implicit threads +* not all PyPI packages support use in multiple interpreters yet + +.. XXX Are there other relevant details to list? + + +.. _interpreters-intro: + +Introduction +------------ + +An "interpreter" is effectively the execution context of the Python +runtime. It contains all of the state the runtime needs to execute +a program. This includes things like the import state and builtins. +(Each thread, even if there's only the main thread, has some extra +runtime state, in addition to the current interpreter, related to +the current exception and the bytecode eval loop.) + +The concept and functionality of the interpreter have been a part of +Python since version 2.2, but the feature was only available through +the C-API and not well known, and the `isolation `_ +was relatively incomplete until version 3.12. + +.. _interp-isolation: + +Multiple Interpreters and Isolation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A Python implementation may support using multiple interpreters in the +same process. CPython has this support. Each interpreter is +effectively isolated from the others (with a limited number of +carefully managed process-global exceptions to the rule). + +That isolation is primarily useful as a strong separation between +distinct logical components of a program, where you want to have +careful control of how those components interact. + +.. note:: + + Interpreters in the same process can technically never be strictly + isolated from one another since there are few restrictions on memory + access within the same process. The Python runtime makes a best + effort at isolation but extension modules may easily violate that. + Therefore, do not use multiple interpreters in security-sensitive + situations, where they shouldn't have access to each other's data. + +Running in an Interpreter +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Running in a different interpreter involves switching to it in the +current thread and then calling some function. The runtime will +execute the function using the current interpreter's state. The +:mod:`!concurrent.interpreters` module provides a basic API for +creating and managing interpreters, as well as the switch-and-call +operation. + +No other threads are automatically started for the operation. +There is `a helper `_ for that though. +There is another dedicated helper for calling the builtin +:func:`exec` in an interpreter. + +When :func:`exec` (or :func:`eval`) are called in an interpreter, +they run using the interpreter's :mod:`!__main__` module as the +"globals" namespace. The same is true for functions that aren't +associated with any module. This is the same as how scripts invoked +from the command-line run in the :mod:`!__main__` module. + + +.. _interp-concurrency: + +Concurrency and Parallelism +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As noted earlier, interpreters do not provide any concurrency +on their own. They strictly represent the isolated execution +context the runtime will use *in the current thread*. That isolation +makes them similar to processes, but they still enjoy in-process +efficiency, like threads. + +All that said, interpreters do naturally support certain flavors of +concurrency. +There's a powerful side effect of that isolation. It enables a +different approach to concurrency than you can take with async or +threads. It's a similar concurrency model to CSP or the actor model, +a model which is relatively easy to reason about. + +You can take advantage of that concurrency model in a single thread, +switching back and forth between interpreters, Stackless-style. +However, this model is more useful when you combine interpreters +with multiple threads. This mostly involves starting a new thread, +where you switch to another interpreter and run what you want there. + +Each actual thread in Python, even if you're only running in the main +thread, has its own *current* execution context. Multiple threads can +use the same interpreter or different ones. + +At a high level, you can think of the combination of threads and +interpreters as threads with opt-in sharing. + +As a significant bonus, interpreters are sufficiently isolated that +they do not share the :term:`GIL`, which means combining threads with +multiple interpreters enables full multi-core parallelism. +(This has been the case since Python 3.12.) + +Communication Between Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In practice, multiple interpreters are useful only if we have a way +to communicate between them. This usually involves some form of +message passing, but can even mean sharing data in some carefully +managed way. + +With this in mind, the :mod:`!concurrent.interpreters` module provides +a :class:`queue.Queue` implementation, available through +:func:`create_queue`. + +.. _interp-object-sharing: + +"Sharing" Objects +^^^^^^^^^^^^^^^^^ + +Any data actually shared between interpreters loses the thread-safety +provided by the :term:`GIL`. There are various options for dealing with +this in extension modules. However, from Python code the lack of +thread-safety means objects can't actually be shared, with a few +exceptions. Instead, a copy must be created, which means mutable +objects won't stay in sync. + +By default, most objects are copied with :mod:`pickle` when they are +passed to another interpreter. Nearly all of the immutable builtin +objects are either directly shared or copied efficiently. For example: + +* :const:`None` +* :class:`bool` (:const:`True` and :const:`False`) +* :class:`bytes` +* :class:`str` +* :class:`int` +* :class:`float` +* :class:`tuple` (of similarly supported objects) + +There is a small number of Python types that actually share mutable +data between interpreters: + +* :class:`memoryview` +* :class:`Queue` + + +Reference +--------- + +This module defines the following functions: + +.. function:: list_all() + + Return a :class:`list` of :class:`Interpreter` objects, + one for each existing interpreter. + +.. function:: get_current() + + Return an :class:`Interpreter` object for the currently running + interpreter. + +.. function:: get_main() + + Return an :class:`Interpreter` object for the main interpreter. + This is the interpreter the runtime created to run the :term:`REPL` + or the script given at the command-line. It is usually the only one. + +.. function:: create() + + Initialize a new (idle) Python interpreter + and return a :class:`Interpreter` object for it. + +.. function:: create_queue() + + Initialize a new cross-interpreter queue and return a :class:`Queue` + object for it. + + +Interpreter objects +^^^^^^^^^^^^^^^^^^^ + +.. class:: Interpreter(id) + + A single interpreter in the current process. + + Generally, :class:`Interpreter` shouldn't be called directly. + Instead, use :func:`create` or one of the other module functions. + + .. attribute:: id + + (read-only) + + The underlying interpreter's ID. + + .. attribute:: whence + + (read-only) + + A string describing where the interpreter came from. + + .. method:: is_running() + + Return ``True`` if the interpreter is currently executing code + in its :mod:`!__main__` module and ``False`` otherwise. + + .. method:: close() + + Finalize and destroy the interpreter. + + .. method:: prepare_main(ns=None, **kwargs) + + Bind objects in the interpreter's :mod:`!__main__` module. + + Some objects are actually shared and some are copied efficiently, + but most are copied via :mod:`pickle`. See :ref:`interp-object-sharing`. + + .. method:: exec(code, /, dedent=True) + + Run the given source code in the interpreter (in the current thread). + + .. method:: call(callable, /, *args, **kwargs) + + Return the result of calling running the given function in the + interpreter (in the current thread). + + .. _interp-call-in-thread: + + .. method:: call_in_thread(callable, /, *args, **kwargs) + + Run the given function in the interpreter (in a new thread). + +Exceptions +^^^^^^^^^^ + +.. exception:: InterpreterError + + This exception, a subclass of :exc:`Exception`, is raised when + an interpreter-related error happens. + +.. exception:: InterpreterNotFoundError + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the targeted interpreter no longer exists. + +.. exception:: ExecutionFailed + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the running code raised an uncaught exception. + + .. attribute:: excinfo + + A basic snapshot of the exception raised in the other interpreter. + +.. XXX Document the excinfoattrs? + +.. exception:: NotShareableError + + This exception, a subclass of :exc:`TypeError`, is raised when + an object cannot be sent to another interpreter. + + +Communicating Between Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. class:: Queue(id) + + A wrapper around a low-level, cross-interpreter queue, which + implements the :class:`queue.Queue` interface. The underlying queue + can only be created through :func:`create_queue`. + + Some objects are actually shared and some are copied efficiently, + but most are copied via :mod:`pickle`. See :ref:`interp-object-sharing`. + + .. attribute:: id + + (read-only) + + The queue's ID. + + +.. exception:: QueueEmptyError + + This exception, a subclass of :exc:`queue.Empty`, is raised from + :meth:`!Queue.get` and :meth:`!Queue.get_nowait` when the queue + is empty. + +.. exception:: QueueFullError + + This exception, a subclass of :exc:`queue.Full`, is raised from + :meth:`!Queue.put` and :meth:`!Queue.put_nowait` when the queue + is full. + + +Basic usage +----------- + +Creating an interpreter and running code in it:: + + from concurrent import interpreters + + interp = interpreters.create() + + # Run in the current OS thread. + + interp.exec('print("spam!")') + + interp.exec("""if True: + print('spam!') + """) + + from textwrap import dedent + interp.exec(dedent(""" + print('spam!') + """)) + + def run(arg): + return arg + + res = interp.call(run, 'spam!') + print(res) + + def run(): + print('spam!') + + interp.call(run) + + # Run in new OS thread. + + t = interp.call_in_thread(run) + t.join() diff --git a/Doc/library/concurrent.rst b/Doc/library/concurrent.rst index 8caea78bbb57e8..748c72c733bba2 100644 --- a/Doc/library/concurrent.rst +++ b/Doc/library/concurrent.rst @@ -1,6 +1,7 @@ The :mod:`!concurrent` package ============================== -Currently, there is only one module in this package: +This package contains the following modules: * :mod:`concurrent.futures` -- Launching parallel tasks +* :mod:`concurrent.interpreters` -- Multiple interpreters in the same process diff --git a/Doc/library/constants.rst b/Doc/library/constants.rst index c0ac4ea8412ebd..d058ba206c6cd6 100644 --- a/Doc/library/constants.rst +++ b/Doc/library/constants.rst @@ -65,8 +65,9 @@ A small number of constants live in the built-in namespace. They are: .. index:: single: ...; ellipsis literal .. data:: Ellipsis - The same as the ellipsis literal "``...``". Special value used mostly in conjunction - with extended slicing syntax for user-defined container data types. + The same as the ellipsis literal "``...``", an object frequently used to + indicate that something is omitted. Assignment to ``Ellipsis`` is possible, but + assignment to ``...`` raises a :exc:`SyntaxError`. ``Ellipsis`` is the sole instance of the :data:`types.EllipsisType` type. @@ -97,15 +98,17 @@ should not be used in programs. exit(code=None) Objects that when printed, print a message like "Use quit() or Ctrl-D - (i.e. EOF) to exit", and when called, raise :exc:`SystemExit` with the + (i.e. EOF) to exit", and when accessed directly in the interactive + interpreter or called as functions, raise :exc:`SystemExit` with the specified exit code. .. data:: help :noindex: Object that when printed, prints the message "Type help() for interactive - help, or help(object) for help about object.", and when called, - acts as described :func:`elsewhere `. + help, or help(object) for help about object.", and when accessed directly + in the interactive interpreter, invokes the built-in help system + (see :func:`help`). .. data:: copyright credits diff --git a/Doc/library/copy.rst b/Doc/library/copy.rst index 95b41f988a035b..210ad7188003e6 100644 --- a/Doc/library/copy.rst +++ b/Doc/library/copy.rst @@ -122,6 +122,8 @@ and only supports named tuples created by :func:`~collections.namedtuple`, This method should create a new object of the same type, replacing fields with values from *changes*. + .. versionadded:: 3.13 + .. seealso:: diff --git a/Doc/library/crypt.rst b/Doc/library/crypt.rst index 9ff37196ccf69f..647cb4925f31da 100644 --- a/Doc/library/crypt.rst +++ b/Doc/library/crypt.rst @@ -13,7 +13,7 @@ being deprecated in Python 3.11. The removal was decided in :pep:`594`. Applications can use the :mod:`hashlib` module from the standard library. Other possible replacements are third-party libraries from PyPI: -:pypi:`legacycrypt`, :pypi:`bcrypt`, :pypi:`argon2-cffi`, or :pypi:`passlib`. +:pypi:`legacycrypt`, :pypi:`bcrypt`, or :pypi:`argon2-cffi`. These are not supported or maintained by the Python core team. The last version of Python that provided the :mod:`!crypt` module was diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 533cdf13974be6..4a033d823e6a7e 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -53,7 +53,7 @@ The :mod:`csv` module defines the following functions: .. index:: single: universal newlines; csv.reader function -.. function:: reader(csvfile, dialect='excel', **fmtparams) +.. function:: reader(csvfile, /, dialect='excel', **fmtparams) Return a :ref:`reader object ` that will process lines from the given *csvfile*. A csvfile must be an iterable of @@ -70,7 +70,7 @@ The :mod:`csv` module defines the following functions: section :ref:`csv-fmt-params`. Each row read from the csv file is returned as a list of strings. No - automatic data type conversion is performed unless the ``QUOTE_NONNUMERIC`` format + automatic data type conversion is performed unless the :data:`QUOTE_NONNUMERIC` format option is specified (in which case unquoted fields are transformed into floats). A short usage example:: @@ -84,7 +84,7 @@ The :mod:`csv` module defines the following functions: Spam, Lovely Spam, Wonderful Spam -.. function:: writer(csvfile, dialect='excel', **fmtparams) +.. function:: writer(csvfile, /, dialect='excel', **fmtparams) Return a writer object responsible for converting the user's data into delimited strings on the given file-like object. *csvfile* can be any object with a @@ -113,7 +113,7 @@ The :mod:`csv` module defines the following functions: spamwriter.writerow(['Spam', 'Lovely Spam', 'Wonderful Spam']) -.. function:: register_dialect(name[, dialect[, **fmtparams]]) +.. function:: register_dialect(name, /, dialect='excel', **fmtparams) Associate *dialect* with *name*. *name* must be a string. The dialect can be specified either by passing a sub-class of :class:`Dialect`, or @@ -139,7 +139,8 @@ The :mod:`csv` module defines the following functions: Return the names of all registered dialects. -.. function:: field_size_limit([new_limit]) +.. function:: field_size_limit() + field_size_limit(new_limit) Returns the current maximum field size allowed by the parser. If *new_limit* is given, this becomes the new limit. @@ -294,8 +295,8 @@ The :mod:`csv` module defines the following classes: - the second through n-th rows contain strings where at least one value's length differs from that of the putative header of that column. - Twenty rows after the first row are sampled; if more than half of columns + - rows meet the criteria, :const:`True` is returned. + Twenty-one rows after the header are sampled; if more than half of the + columns + rows meet the criteria, :const:`True` is returned. .. note:: @@ -323,23 +324,32 @@ The :mod:`csv` module defines the following constants: .. data:: QUOTE_MINIMAL Instructs :class:`writer` objects to only quote those fields which contain - special characters such as *delimiter*, *quotechar* or any of the characters in - *lineterminator*. + special characters such as *delimiter*, *quotechar*, ``'\r'``, ``'\n'`` + or any of the characters in *lineterminator*. .. data:: QUOTE_NONNUMERIC Instructs :class:`writer` objects to quote all non-numeric fields. - Instructs :class:`reader` objects to convert all non-quoted fields to type *float*. + Instructs :class:`reader` objects to convert all non-quoted fields to type :class:`float`. + .. note:: + Some numeric types, such as :class:`bool`, :class:`~fractions.Fraction`, + or :class:`~enum.IntEnum`, have a string representation that cannot be + converted to :class:`float`. + They cannot be read in the :data:`QUOTE_NONNUMERIC` and + :data:`QUOTE_STRINGS` modes. .. data:: QUOTE_NONE - Instructs :class:`writer` objects to never quote fields. When the current - *delimiter* occurs in output data it is preceded by the current *escapechar* - character. If *escapechar* is not set, the writer will raise :exc:`Error` if + Instructs :class:`writer` objects to never quote fields. + When the current *delimiter*, *quotechar*, *escapechar*, ``'\r'``, ``'\n'`` + or any of the characters in *lineterminator* occurs in output data + it is preceded by the current *escapechar* character. + If *escapechar* is not set, the writer will raise :exc:`Error` if any characters that require escaping are encountered. + Set *quotechar* to ``None`` to prevent its escaping. Instructs :class:`reader` objects to perform no special processing of quote characters. @@ -408,9 +418,16 @@ Dialects support the following attributes: .. attribute:: Dialect.escapechar - A one-character string used by the writer to escape the *delimiter* if *quoting* - is set to :const:`QUOTE_NONE` and the *quotechar* if *doublequote* is - :const:`False`. On reading, the *escapechar* removes any special meaning from + A one-character string used by the writer to escape characters that + require escaping: + + * the *delimiter*, the *quotechar*, ``'\r'``, ``'\n'`` and any of the + characters in *lineterminator* are escaped if *quoting* is set to + :const:`QUOTE_NONE`; + * the *quotechar* is escaped if *doublequote* is :const:`False`; + * the *escapechar* itself. + + On reading, the *escapechar* removes any special meaning from the following character. It defaults to :const:`None`, which disables escaping. .. versionchanged:: 3.11 @@ -430,9 +447,12 @@ Dialects support the following attributes: .. attribute:: Dialect.quotechar - A one-character string used to quote fields containing special characters, such - as the *delimiter* or *quotechar*, or which contain new-line characters. It - defaults to ``'"'``. + A one-character string used to quote fields containing special characters, + such as the *delimiter* or the *quotechar*, or which contain new-line + characters (``'\r'``, ``'\n'`` or any of the characters in *lineterminator*). + It defaults to ``'"'``. + Can be set to ``None`` to prevent escaping ``'"'`` if *quoting* is set + to :const:`QUOTE_NONE`. .. versionchanged:: 3.11 An empty *quotechar* is not allowed. @@ -441,13 +461,15 @@ Dialects support the following attributes: Controls when quotes should be generated by the writer and recognised by the reader. It can take on any of the :ref:`QUOTE_\* constants ` - and defaults to :const:`QUOTE_MINIMAL`. + and defaults to :const:`QUOTE_MINIMAL` if *quotechar* is not ``None``, + and :const:`QUOTE_NONE` otherwise. .. attribute:: Dialect.skipinitialspace When :const:`True`, spaces immediately following the *delimiter* are ignored. - The default is :const:`False`. + The default is :const:`False`. When combining ``delimiter=' '`` with + ``skipinitialspace=True``, unquoted empty fields are not allowed. .. attribute:: Dialect.strict @@ -506,7 +528,7 @@ out surrounded by parens. This may cause some problems for other programs which read CSV files (assuming they support complex numbers at all). -.. method:: csvwriter.writerow(row) +.. method:: csvwriter.writerow(row, /) Write the *row* parameter to the writer's file object, formatted according to the current :class:`Dialect`. Return the return value of the call to the @@ -515,7 +537,7 @@ read CSV files (assuming they support complex numbers at all). .. versionchanged:: 3.5 Added support of arbitrary iterables. -.. method:: csvwriter.writerows(rows) +.. method:: csvwriter.writerows(rows, /) Write all elements in *rows* (an iterable of *row* objects as described above) to the writer's file object, formatted according to the current @@ -603,7 +625,7 @@ A slightly more advanced use of the reader --- catching and reporting errors:: for row in reader: print(row) except csv.Error as e: - sys.exit('file {}, line {}: {}'.format(filename, reader.line_num, e)) + sys.exit(f'file {filename}, line {reader.line_num}: {e}') And while the module doesn't directly support parsing strings, it can easily be done:: @@ -616,7 +638,7 @@ done:: .. rubric:: Footnotes .. [1] If ``newline=''`` is not specified, newlines embedded inside quoted fields - will not be interpreted correctly, and on platforms that use ``\r\n`` linendings + will not be interpreted correctly, and on platforms that use ``\r\n`` line endings on write an extra ``\r`` will be added. It should always be safe to specify ``newline=''``, since the csv module does its own (:term:`universal `) newline handling. diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 5b733d5321e907..9c0b246c095483 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -14,6 +14,8 @@ data types, and allows calling functions in DLLs or shared libraries. It can be used to wrap these libraries in pure Python. +.. include:: ../includes/optional-module.rst + .. _ctypes-ctypes-tutorial: @@ -232,8 +234,24 @@ Fundamental data types +----------------------+------------------------------------------+----------------------------+ | :class:`c_int` | :c:expr:`int` | int | +----------------------+------------------------------------------+----------------------------+ +| :class:`c_int8` | :c:type:`int8_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_int16` | :c:type:`int16_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_int32` | :c:type:`int32_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_int64` | :c:type:`int64_t` | int | ++----------------------+------------------------------------------+----------------------------+ | :class:`c_uint` | :c:expr:`unsigned int` | int | +----------------------+------------------------------------------+----------------------------+ +| :class:`c_uint8` | :c:type:`uint8_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_uint16` | :c:type:`uint16_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_uint32` | :c:type:`uint32_t` | int | ++----------------------+------------------------------------------+----------------------------+ +| :class:`c_uint64` | :c:type:`uint64_t` | int | ++----------------------+------------------------------------------+----------------------------+ | :class:`c_long` | :c:expr:`long` | int | +----------------------+------------------------------------------+----------------------------+ | :class:`c_ulong` | :c:expr:`unsigned long` | int | @@ -684,14 +702,10 @@ compiler does it. It is possible to override this behavior entirely by specifyi :attr:`~Structure._layout_` class attribute in the subclass definition; see the attribute documentation for details. -It is possible to specify the maximum alignment for the fields by setting -the :attr:`~Structure._pack_` class attribute to a positive integer. -This matches what ``#pragma pack(n)`` does in MSVC. - -It is also possible to set a minimum alignment for how the subclass itself is packed in the -same way ``#pragma align(n)`` works in MSVC. -This can be achieved by specifying a :attr:`~Structure._align_` class attribute -in the subclass definition. +It is possible to specify the maximum alignment for the fields and/or for the +structure itself by setting the class attributes :attr:`~Structure._pack_` +and/or :attr:`~Structure._align_`, respectively. +See the attribute documentation for details. :mod:`ctypes` uses the native byte order for Structures and Unions. To build structures with non-native byte order, you can use one of the @@ -714,10 +728,16 @@ item in the :attr:`~Structure._fields_` tuples:: ... ("second_16", c_int, 16)] ... >>> print(Int.first_16) - + >>> print(Int.second_16) - - >>> + + +It is important to note that bit field allocation and layout in memory are not +defined as a C standard; their implementation is compiler-specific. +By default, Python will attempt to match the behavior of a "native" compiler +for the current platform. +See the :attr:`~Structure._layout_` attribute for details on the default +behavior and how to change it. .. _ctypes-arrays: @@ -876,7 +896,7 @@ invalid non-\ ``NULL`` pointers would crash Python):: Thread safety without the GIL ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In Python 3.13, the :term:`GIL` may be disabled on :term:`experimental free threaded ` builds. +From Python 3.13 onward, the :term:`GIL` can be disabled on :term:`free threaded ` builds. In ctypes, reads and writes to a single object concurrently is safe, but not across multiple objects: .. code-block:: pycon @@ -2031,35 +2051,55 @@ Utility functions pointer. -.. function:: create_string_buffer(init_or_size, size=None) +.. function:: create_string_buffer(init, size=None) + create_string_buffer(size) This function creates a mutable character buffer. The returned object is a ctypes array of :class:`c_char`. - *init_or_size* must be an integer which specifies the size of the array, or a - bytes object which will be used to initialize the array items. + If *size* is given (and not ``None``), it must be an :class:`int`. + It specifies the size of the returned array. + + If the *init* argument is given, it must be :class:`bytes`. It is used + to initialize the array items. Bytes not initialized this way are + set to zero (NUL). + + If *size* is not given (or if it is ``None``), the buffer is made one element + larger than *init*, effectively adding a NUL terminator. + + If both arguments are given, *size* must not be less than ``len(init)``. - If a bytes object is specified as first argument, the buffer is made one item - larger than its length so that the last element in the array is a NUL - termination character. An integer can be passed as second argument which allows - specifying the size of the array if the length of the bytes should not be used. + .. warning:: + + If *size* is equal to ``len(init)``, a NUL terminator is + not added. Do not treat such a buffer as a C string. + + For example:: + + >>> bytes(create_string_buffer(2)) + b'\x00\x00' + >>> bytes(create_string_buffer(b'ab')) + b'ab\x00' + >>> bytes(create_string_buffer(b'ab', 2)) + b'ab' + >>> bytes(create_string_buffer(b'ab', 4)) + b'ab\x00\x00' + >>> bytes(create_string_buffer(b'abcdef', 2)) + Traceback (most recent call last): + ... + ValueError: byte string too long .. audit-event:: ctypes.create_string_buffer init,size ctypes.create_string_buffer -.. function:: create_unicode_buffer(init_or_size, size=None) +.. function:: create_unicode_buffer(init, size=None) + create_unicode_buffer(size) This function creates a mutable unicode character buffer. The returned object is a ctypes array of :class:`c_wchar`. - *init_or_size* must be an integer which specifies the size of the array, or a - string which will be used to initialize the array items. - - If a string is specified as first argument, the buffer is made one item - larger than the length of the string so that the last element in the array is a - NUL termination character. An integer can be passed as second argument which - allows specifying the size of the array if the length of the string should not - be used. + The function takes the same arguments as :func:`~create_string_buffer` except + *init* must be a string and *size* counts :class:`c_wchar`. .. audit-event:: ctypes.create_unicode_buffer init,size ctypes.create_unicode_buffer @@ -2498,7 +2538,7 @@ These are the fundamental ctypes data types: .. class:: c_int8 - Represents the C 8-bit :c:expr:`signed int` datatype. Usually an alias for + Represents the C 8-bit :c:expr:`signed int` datatype. It is an alias for :class:`c_byte`. @@ -2573,7 +2613,7 @@ These are the fundamental ctypes data types: .. class:: c_uint8 - Represents the C 8-bit :c:expr:`unsigned int` datatype. Usually an alias for + Represents the C 8-bit :c:expr:`unsigned int` datatype. It is an alias for :class:`c_ubyte`. @@ -2750,11 +2790,18 @@ fields, or any other data types containing pointer type fields. .. attribute:: _pack_ An optional small integer that allows overriding the alignment of - structure fields in the instance. :attr:`_pack_` must already be defined - when :attr:`_fields_` is assigned, otherwise it will have no effect. - Setting this attribute to 0 is the same as not setting it at all. + structure fields in the instance. - This is only implemented for the MSVC-compatible memory layout. + This is only implemented for the MSVC-compatible memory layout + (see :attr:`_layout_`). + + Setting :attr:`!_pack_` to 0 is the same as not setting it at all. + Otherwise, the value must be a positive power of two. + The effect is equivalent to ``#pragma pack(N)`` in C, except + :mod:`ctypes` may allow larger *n* than what the compiler accepts. + + :attr:`!_pack_` must already be defined + when :attr:`_fields_` is assigned, otherwise it will have no effect. .. deprecated-removed:: 3.14 3.19 @@ -2767,9 +2814,22 @@ fields, or any other data types containing pointer type fields. .. attribute:: _align_ - An optional small integer that allows overriding the alignment of + An optional small integer that allows increasing the alignment of the structure when being packed or unpacked to/from memory. - Setting this attribute to 0 is the same as not setting it at all. + + The value must not be negative. + The effect is equivalent to ``__attribute__((aligned(N)))`` on GCC + or ``#pragma align(N)`` on MSVC, except :mod:`ctypes` may allow + values that the compiler would reject. + + :attr:`!_align_` can only *increase* a structure's alignment + requirements. Setting it to 0 or 1 has no effect. + + Using values that are not powers of two is discouraged and may lead to + surprising behavior. + + :attr:`!_align_` must already be defined + when :attr:`_fields_` is assigned, otherwise it will have no effect. .. versionadded:: 3.13 @@ -2939,7 +2999,7 @@ fields, or any other data types containing pointer type fields. .. attribute:: is_anonymous True if this field is anonymous, that is, it contains nested sub-fields - that should be be merged into a containing structure or union. + that should be merged into a containing structure or union. .. _ctypes-arrays-pointers: diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 137504c51b4358..057d338edda92a 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -23,6 +23,8 @@ Linux and the BSD variants of Unix. .. include:: ../includes/wasm-mobile-notavail.rst +.. include:: ../includes/optional-module.rst + .. note:: Whenever the documentation mentions a *character* it can be specified @@ -68,7 +70,7 @@ The module :mod:`curses` defines the following exception: The module :mod:`curses` defines the following functions: -.. function:: assume_default_colors(fg, bg) +.. function:: assume_default_colors(fg, bg, /) Allow use of default values for colors on terminals supporting this feature. Use this to support transparency in your application. @@ -716,8 +718,10 @@ The module :mod:`curses` defines the following functions: Window Objects -------------- -Window objects, as returned by :func:`initscr` and :func:`newwin` above, have -the following methods and attributes: +.. class:: window + + Window objects, as returned by :func:`initscr` and :func:`newwin` above, have + the following methods and attributes: .. method:: window.addch(ch[, attr]) @@ -770,7 +774,7 @@ the following methods and attributes: .. method:: window.attron(attr) - Add attribute *attr* from the "background" set applied to all writes to the + Add attribute *attr* to the "background" set applied to all writes to the current window. @@ -988,6 +992,10 @@ the following methods and attributes: window.getstr(y, x, n) Read a bytes object from the user, with primitive line editing capacity. + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.getyx() @@ -1079,6 +1087,10 @@ the following methods and attributes: current cursor position, or at *y*, *x* if specified. Attributes are stripped from the characters. If *n* is specified, :meth:`instr` returns a string at most *n* characters long (exclusive of the trailing NUL). + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.is_linetouched(line) @@ -1339,7 +1351,6 @@ The :mod:`curses` module defines the following data members: .. data:: version -.. data:: __version__ A bytes object representing the current version of the module. diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 72612211b43d5e..cff36e258224d3 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -121,8 +121,11 @@ Module contents :meth:`!__le__`, :meth:`!__gt__`, or :meth:`!__ge__`, then :exc:`TypeError` is raised. - - *unsafe_hash*: If ``False`` (the default), a :meth:`~object.__hash__` method - is generated according to how *eq* and *frozen* are set. + - *unsafe_hash*: If true, force ``dataclasses`` to create a + :meth:`~object.__hash__` method, even though it may not be safe to do so. + Otherwise, generate a :meth:`~object.__hash__` method according to how + *eq* and *frozen* are set. + The default value is ``False``. :meth:`!__hash__` is used by built-in :meth:`hash`, and when objects are added to hashed collections such as dictionaries and sets. Having a @@ -158,9 +161,11 @@ Module contents :class:`object`, this means it will fall back to id-based hashing). - *frozen*: If true (the default is ``False``), assigning to fields will - generate an exception. This emulates read-only frozen instances. If - :meth:`~object.__setattr__` or :meth:`~object.__delattr__` is defined in the class, then - :exc:`TypeError` is raised. See the discussion below. + generate an exception. This emulates read-only frozen instances. + See the :ref:`discussion ` below. + + If :meth:`~object.__setattr__` or :meth:`~object.__delattr__` is defined in the class + and *frozen* is true, then :exc:`TypeError` is raised. - *match_args*: If true (the default is ``True``), the :attr:`~object.__match_args__` tuple will be created from the list of @@ -304,15 +309,15 @@ Module contents .. versionadded:: 3.10 - - ``doc``: optional docstring for this field. + - *doc*: optional docstring for this field. - .. versionadded:: 3.13 + .. versionadded:: 3.14 If the default value of a field is specified by a call to :func:`!field`, then the class attribute for this field will be replaced by the specified *default* value. If *default* is not provided, then the class attribute will be deleted. The intent is - that after the :func:`@dataclass ` decorator runs, the class + that after the :deco:`dataclass` decorator runs, the class attributes will all contain the default values for the fields, just as if the default value itself were specified. For example, after:: @@ -422,7 +427,7 @@ Module contents :data:`typing.Any` is used for ``type``. The values of *init*, *repr*, *eq*, *order*, *unsafe_hash*, *frozen*, *match_args*, *kw_only*, *slots*, and *weakref_slot* have - the same meaning as they do in :func:`@dataclass `. + the same meaning as they do in :deco:`dataclass`. If *module* is defined, the :attr:`!__module__` attribute of the dataclass is set to that value. @@ -430,12 +435,12 @@ Module contents The *decorator* parameter is a callable that will be used to create the dataclass. It should take the class object as a first argument and the same keyword arguments - as :func:`@dataclass `. By default, the :func:`@dataclass ` + as :deco:`dataclass`. By default, the :deco:`dataclass` function is used. This function is not strictly required, because any Python - mechanism for creating a new class with :attr:`!__annotations__` can - then apply the :func:`@dataclass ` function to convert that class to + mechanism for creating a new class with :attr:`~object.__annotations__` can + then apply the :deco:`dataclass` function to convert that class to a dataclass. This function is provided as a convenience. For example:: @@ -564,7 +569,7 @@ Post-init processing def __post_init__(self): self.c = self.a + self.b -The :meth:`~object.__init__` method generated by :func:`@dataclass ` does not call base +The :meth:`~object.__init__` method generated by :deco:`dataclass` does not call base class :meth:`!__init__` methods. If the base class has an :meth:`!__init__` method that has to be called, it is common to call this method in a :meth:`__post_init__` method:: @@ -594,7 +599,7 @@ parameters to :meth:`!__post_init__`. Also see the warning about how Class variables --------------- -One of the few places where :func:`@dataclass ` actually inspects the type +One of the few places where :deco:`dataclass` actually inspects the type of a field is to determine if a field is a class variable as defined in :pep:`526`. It does this by checking if the type of the field is :data:`typing.ClassVar`. If a field is a ``ClassVar``, it is excluded @@ -607,7 +612,7 @@ module-level :func:`fields` function. Init-only variables ------------------- -Another place where :func:`@dataclass ` inspects a type annotation is to +Another place where :deco:`dataclass` inspects a type annotation is to determine if a field is an init-only variable. It does this by seeing if the type of a field is of type :class:`InitVar`. If a field is an :class:`InitVar`, it is considered a pseudo-field called an init-only @@ -641,7 +646,7 @@ Frozen instances ---------------- It is not possible to create truly immutable Python objects. However, -by passing ``frozen=True`` to the :func:`@dataclass ` decorator you can +by passing ``frozen=True`` to the :deco:`dataclass` decorator you can emulate immutability. In that case, dataclasses will add :meth:`~object.__setattr__` and :meth:`~object.__delattr__` methods to the class. These methods will raise a :exc:`FrozenInstanceError` when invoked. @@ -657,7 +662,7 @@ must use :meth:`!object.__setattr__`. Inheritance ----------- -When the dataclass is being created by the :func:`@dataclass ` decorator, +When the dataclass is being created by the :deco:`dataclass` decorator, it looks through all of the class's base classes in reverse MRO (that is, starting at :class:`object`) and, for each dataclass that it finds, adds the fields from that base class to an ordered mapping of fields. @@ -781,7 +786,7 @@ for :attr:`!x` when creating a class instance will share the same copy of :attr:`!x`. Because dataclasses just use normal Python class creation they also share this behavior. There is no general way for Data Classes to detect this condition. Instead, the -:func:`@dataclass ` decorator will raise a :exc:`ValueError` if it +:deco:`dataclass` decorator will raise a :exc:`ValueError` if it detects an unhashable default parameter. The assumption is that if a value is unhashable, it is mutable. This is a partial solution, but it does protect against many common errors. @@ -815,7 +820,7 @@ default value have the following special behaviors: :meth:`~object.__get__` or :meth:`!__set__` method is called rather than returning or overwriting the descriptor object. -* To determine whether a field contains a default value, :func:`@dataclass ` +* To determine whether a field contains a default value, :deco:`dataclass` will call the descriptor's :meth:`!__get__` method using its class access form: ``descriptor.__get__(obj=None, type=cls)``. If the descriptor returns a value in this case, it will be used as the diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 1ce2013f05da2e..3470f42a6c622d 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -261,6 +261,22 @@ A :class:`timedelta` object represents a duration, the difference between two >>> (d.days, d.seconds, d.microseconds) (-1, 86399, 999999) + Since the string representation of :class:`!timedelta` objects can be confusing, + use the following recipe to produce a more readable format: + + .. code-block:: pycon + + >>> def pretty_timedelta(td): + ... if td.days >= 0: + ... return str(td) + ... return f'-({-td!s})' + ... + >>> d = timedelta(hours=-1) + >>> str(d) # not human-friendly + '-1 day, 23:00:00' + >>> pretty_timedelta(d) + '-(1:00:00)' + Class attributes: diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 36221c026d6d4b..628232cb631bd4 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -84,10 +84,13 @@ the Oracle Berkeley DB. .. versionchanged:: 3.11 *file* accepts a :term:`path-like object`. -The object returned by :func:`~dbm.open` supports the same basic functionality as a -:class:`dict`; keys and their corresponding values can be stored, retrieved, and -deleted, and the :keyword:`in` operator and the :meth:`!keys` method are -available, as well as :meth:`!get` and :meth:`!setdefault` methods. +The object returned by :func:`~dbm.open` supports the basic +functionality of mutable :term:`mappings `; +keys and their corresponding values can be stored, retrieved, and +deleted, and iteration, the :keyword:`in` operator and methods :meth:`!keys`, +:meth:`!get`, :meth:`!setdefault` and :meth:`!clear` are available. +The :meth:`!keys` method returns a list instead of a view object. +The :meth:`!setdefault` method requires two arguments. Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before @@ -108,6 +111,10 @@ will automatically close them when done. Deleting a key from a read-only database raises a database module specific exception instead of :exc:`KeyError`. +.. versionchanged:: 3.13 + :meth:`!clear` methods are now available for all :mod:`dbm` backends. + + The following example records some hostnames and a corresponding title, and then prints out the contents of the database:: @@ -167,9 +174,6 @@ or any other SQLite browser, including the SQLite CLI. .. function:: open(filename, /, flag="r", mode=0o666) Open an SQLite database. - The returned object behaves like a :term:`mapping`, - implements a :meth:`!close` method, - and supports a "closing" context manager via the :keyword:`with` keyword. :param filename: The path to the database to be opened. @@ -186,6 +190,17 @@ or any other SQLite browser, including the SQLite CLI. The Unix file access mode of the file (default: octal ``0o666``), used only when the database has to be created. + The returned database object behaves similar to a mutable :term:`mapping`, + but the :meth:`!keys` method returns a list, and + the :meth:`!setdefault` method requires two arguments. + It also supports a "closing" context manager via the :keyword:`with` keyword. + + The following method is also provided: + + .. method:: sqlite3.close() + + Close the SQLite database. + :mod:`dbm.gnu` --- GNU database manager --------------------------------------- @@ -215,6 +230,11 @@ functionality like crash tolerance. raised for general mapping errors like specifying an incorrect key. +.. data:: open_flags + + A string of characters the *flag* parameter of :meth:`~dbm.gnu.open` supports. + + .. function:: open(filename, flag="r", mode=0o666, /) Open a GDBM database and return a :class:`!gdbm` object. @@ -250,14 +270,25 @@ functionality like crash tolerance. .. versionchanged:: 3.11 *filename* accepts a :term:`path-like object`. - .. data:: open_flags + :class:`!gdbm` objects behave similar to mutable :term:`mappings `, + but methods :meth:`!items`, :meth:`!values`, :meth:`!pop`, :meth:`!popitem`, + and :meth:`!update` are not supported, + the :meth:`!keys` method returns a list, and + the :meth:`!setdefault` method requires two arguments. + It also supports a "closing" context manager via the :keyword:`with` keyword. + + .. versionchanged:: 3.2 + Added the :meth:`!get` and :meth:`!setdefault` methods. - A string of characters the *flag* parameter of :meth:`~dbm.gnu.open` supports. + .. versionchanged:: 3.13 + Added the :meth:`!clear` method. - :class:`!gdbm` objects behave similar to :term:`mappings `, - but :meth:`!items` and :meth:`!values` methods are not supported. The following methods are also provided: + .. method:: gdbm.close() + + Close the GDBM database. + .. method:: gdbm.firstkey() It's possible to loop over every key in the database using this method and the @@ -289,16 +320,6 @@ functionality like crash tolerance. When the database has been opened in fast mode, this method forces any unwritten data to be written to the disk. - .. method:: gdbm.close() - - Close the GDBM database. - - .. method:: gdbm.clear() - - Remove all items from the GDBM database. - - .. versionadded:: 3.13 - :mod:`dbm.ndbm` --- New Database Manager ---------------------------------------- @@ -359,22 +380,27 @@ This module can be used with the "classic" NDBM interface or the :param int mode: |mode_param_doc| - :class:`!ndbm` objects behave similar to :term:`mappings `, - but :meth:`!items` and :meth:`!values` methods are not supported. - The following methods are also provided: - .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. - .. method:: ndbm.close() + :class:`!ndbm` objects behave similar to mutable :term:`mappings `, + but methods :meth:`!items`, :meth:`!values`, :meth:`!pop`, :meth:`!popitem`, + and :meth:`!update` are not supported, + the :meth:`!keys` method returns a list, and + the :meth:`!setdefault` method requires two arguments. + It also supports a "closing" context manager via the :keyword:`with` keyword. - Close the NDBM database. + .. versionchanged:: 3.2 + Added the :meth:`!get` and :meth:`!setdefault` methods. - .. method:: ndbm.clear() + .. versionchanged:: 3.13 + Added the :meth:`!clear` method. - Remove all items from the NDBM database. + The following method is also provided: - .. versionadded:: 3.13 + .. method:: ndbm.close() + + Close the NDBM database. :mod:`dbm.dumb` --- Portable DBM implementation @@ -412,9 +438,6 @@ The :mod:`!dbm.dumb` module defines the following: .. function:: open(filename, flag="c", mode=0o666) Open a :mod:`!dbm.dumb` database. - The returned database object behaves similar to a :term:`mapping`, - in addition to providing :meth:`~dumbdbm.sync` and :meth:`~dumbdbm.close` - methods. :param filename: The basename of the database file (without extensions). @@ -448,15 +471,18 @@ The :mod:`!dbm.dumb` module defines the following: .. versionchanged:: 3.11 *filename* accepts a :term:`path-like object`. - In addition to the methods provided by the - :class:`collections.abc.MutableMapping` class, - the following methods are provided: + The returned database object behaves similar to a mutable :term:`mapping`, + but the :meth:`!keys` and :meth:`!items` methods return lists, and + the :meth:`!setdefault` method requires two arguments. + It also supports a "closing" context manager via the :keyword:`with` keyword. - .. method:: dumbdbm.sync() - - Synchronize the on-disk directory and data files. This method is called - by the :meth:`shelve.Shelf.sync` method. + The following methods are also provided: .. method:: dumbdbm.close() Close the database. + + .. method:: dumbdbm.sync() + + Synchronize the on-disk directory and data files. This method is called + by the :meth:`shelve.Shelf.sync` method. diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index f53c1f3467034e..08e87e29cd7ce6 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -2,7 +2,7 @@ ===================================================================== .. module:: decimal - :synopsis: Implementation of the General Decimal Arithmetic Specification. + :synopsis: Implementation of the General Decimal Arithmetic Specification. .. moduleauthor:: Eric Price .. moduleauthor:: Facundo Batista @@ -121,7 +121,7 @@ reset them before monitoring a calculation. .. _decimal-tutorial: -Quick-start Tutorial +Quick-start tutorial -------------------- The usual start to using decimals is importing the module, viewing the current @@ -264,7 +264,7 @@ allows the settings to be changed. This approach meets the needs of most applications. For more advanced work, it may be useful to create alternate contexts using the -Context() constructor. To make an alternate active, use the :func:`setcontext` +:meth:`Context` constructor. To make an alternate active, use the :func:`setcontext` function. In accordance with the standard, the :mod:`decimal` module provides two ready to @@ -573,7 +573,7 @@ Decimal objects >>> Decimal(321).exp() Decimal('2.561702493119680037517373933E+139') - .. classmethod:: from_float(f) + .. classmethod:: from_float(f, /) Alternative constructor that only accepts instances of :class:`float` or :class:`int`. @@ -600,7 +600,7 @@ Decimal objects .. versionadded:: 3.1 - .. classmethod:: from_number(number) + .. classmethod:: from_number(number, /) Alternative constructor that only accepts instances of :class:`float`, :class:`int` or :class:`Decimal`, but not strings @@ -991,7 +991,7 @@ Each thread has its own current context which is accessed or changed using the Return the current context for the active thread. -.. function:: setcontext(c) +.. function:: setcontext(c, /) Set the current context for the active thread to *c*. @@ -1096,40 +1096,52 @@ In addition to the three supplied contexts, new contexts can be created with the default values are copied from the :const:`DefaultContext`. If the *flags* field is not specified or is :const:`None`, all flags are cleared. - *prec* is an integer in the range [``1``, :const:`MAX_PREC`] that sets - the precision for arithmetic operations in the context. + .. attribute:: prec - The *rounding* option is one of the constants listed in the section - `Rounding Modes`_. + An integer in the range [``1``, :const:`MAX_PREC`] that sets + the precision for arithmetic operations in the context. - The *traps* and *flags* fields list any signals to be set. Generally, new - contexts should only set traps and leave the flags clear. + .. attribute:: rounding - The *Emin* and *Emax* fields are integers specifying the outer limits allowable - for exponents. *Emin* must be in the range [:const:`MIN_EMIN`, ``0``], - *Emax* in the range [``0``, :const:`MAX_EMAX`]. + One of the constants listed in the section `Rounding Modes`_. - The *capitals* field is either ``0`` or ``1`` (the default). If set to - ``1``, exponents are printed with a capital ``E``; otherwise, a - lowercase ``e`` is used: ``Decimal('6.02e+23')``. + .. attribute:: traps + flags - The *clamp* field is either ``0`` (the default) or ``1``. - If set to ``1``, the exponent ``e`` of a :class:`Decimal` - instance representable in this context is strictly limited to the - range ``Emin - prec + 1 <= e <= Emax - prec + 1``. If *clamp* is - ``0`` then a weaker condition holds: the adjusted exponent of - the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is - ``1``, a large normal number will, where possible, have its - exponent reduced and a corresponding number of zeros added to its - coefficient, in order to fit the exponent constraints; this - preserves the value of the number but loses information about - significant trailing zeros. For example:: + Lists of any signals to be set. Generally, new contexts should only set + traps and leave the flags clear. - >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') - Decimal('1.23000E+999') + .. attribute:: Emin + Emax - A *clamp* value of ``1`` allows compatibility with the - fixed-width decimal interchange formats specified in IEEE 754. + Integers specifying the outer limits allowable for exponents. *Emin* must + be in the range [:const:`MIN_EMIN`, ``0``], *Emax* in the range + [``0``, :const:`MAX_EMAX`]. + + .. attribute:: capitals + + Either ``0`` or ``1`` (the default). If set to + ``1``, exponents are printed with a capital ``E``; otherwise, a + lowercase ``e`` is used: ``Decimal('6.02e+23')``. + + .. attribute:: clamp + + Either ``0`` (the default) or ``1``. If set to ``1``, the exponent ``e`` + of a :class:`Decimal` instance representable in this context is strictly + limited to the range ``Emin - prec + 1 <= e <= Emax - prec + 1``. + If *clamp* is ``0`` then a weaker condition holds: the adjusted exponent of + the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is + ``1``, a large normal number will, where possible, have its + exponent reduced and a corresponding number of zeros added to its + coefficient, in order to fit the exponent constraints; this + preserves the value of the number but loses information about + significant trailing zeros. For example:: + + >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') + Decimal('1.23000E+999') + + A *clamp* value of ``1`` allows compatibility with the + fixed-width decimal interchange formats specified in IEEE 754. The :class:`Context` class defines several general purpose methods as well as a large number of methods for doing arithmetic directly in a given context. @@ -1156,11 +1168,11 @@ In addition to the three supplied contexts, new contexts can be created with the Return a duplicate of the context. - .. method:: copy_decimal(num) + .. method:: copy_decimal(num, /) Return a copy of the Decimal instance num. - .. method:: create_decimal(num) + .. method:: create_decimal(num='0', /) Creates a new Decimal instance from *num* but using *self* as context. Unlike the :class:`Decimal` constructor, the context precision, @@ -1184,7 +1196,7 @@ In addition to the three supplied contexts, new contexts can be created with the If the argument is a string, no leading or trailing whitespace or underscores are permitted. - .. method:: create_decimal_from_float(f) + .. method:: create_decimal_from_float(f, /) Creates a new Decimal instance from a float *f* but rounding using *self* as the context. Unlike the :meth:`Decimal.from_float` class method, @@ -1222,222 +1234,222 @@ In addition to the three supplied contexts, new contexts can be created with the recounted here. - .. method:: abs(x) + .. method:: abs(x, /) Returns the absolute value of *x*. - .. method:: add(x, y) + .. method:: add(x, y, /) Return the sum of *x* and *y*. - .. method:: canonical(x) + .. method:: canonical(x, /) Returns the same Decimal object *x*. - .. method:: compare(x, y) + .. method:: compare(x, y, /) Compares *x* and *y* numerically. - .. method:: compare_signal(x, y) + .. method:: compare_signal(x, y, /) Compares the values of the two operands numerically. - .. method:: compare_total(x, y) + .. method:: compare_total(x, y, /) Compares two operands using their abstract representation. - .. method:: compare_total_mag(x, y) + .. method:: compare_total_mag(x, y, /) Compares two operands using their abstract representation, ignoring sign. - .. method:: copy_abs(x) + .. method:: copy_abs(x, /) Returns a copy of *x* with the sign set to 0. - .. method:: copy_negate(x) + .. method:: copy_negate(x, /) Returns a copy of *x* with the sign inverted. - .. method:: copy_sign(x, y) + .. method:: copy_sign(x, y, /) Copies the sign from *y* to *x*. - .. method:: divide(x, y) + .. method:: divide(x, y, /) Return *x* divided by *y*. - .. method:: divide_int(x, y) + .. method:: divide_int(x, y, /) Return *x* divided by *y*, truncated to an integer. - .. method:: divmod(x, y) + .. method:: divmod(x, y, /) Divides two numbers and returns the integer part of the result. - .. method:: exp(x) + .. method:: exp(x, /) Returns ``e ** x``. - .. method:: fma(x, y, z) + .. method:: fma(x, y, z, /) Returns *x* multiplied by *y*, plus *z*. - .. method:: is_canonical(x) + .. method:: is_canonical(x, /) Returns ``True`` if *x* is canonical; otherwise returns ``False``. - .. method:: is_finite(x) + .. method:: is_finite(x, /) Returns ``True`` if *x* is finite; otherwise returns ``False``. - .. method:: is_infinite(x) + .. method:: is_infinite(x, /) Returns ``True`` if *x* is infinite; otherwise returns ``False``. - .. method:: is_nan(x) + .. method:: is_nan(x, /) Returns ``True`` if *x* is a qNaN or sNaN; otherwise returns ``False``. - .. method:: is_normal(x) + .. method:: is_normal(x, /) Returns ``True`` if *x* is a normal number; otherwise returns ``False``. - .. method:: is_qnan(x) + .. method:: is_qnan(x, /) Returns ``True`` if *x* is a quiet NaN; otherwise returns ``False``. - .. method:: is_signed(x) + .. method:: is_signed(x, /) Returns ``True`` if *x* is negative; otherwise returns ``False``. - .. method:: is_snan(x) + .. method:: is_snan(x, /) Returns ``True`` if *x* is a signaling NaN; otherwise returns ``False``. - .. method:: is_subnormal(x) + .. method:: is_subnormal(x, /) Returns ``True`` if *x* is subnormal; otherwise returns ``False``. - .. method:: is_zero(x) + .. method:: is_zero(x, /) Returns ``True`` if *x* is a zero; otherwise returns ``False``. - .. method:: ln(x) + .. method:: ln(x, /) Returns the natural (base e) logarithm of *x*. - .. method:: log10(x) + .. method:: log10(x, /) Returns the base 10 logarithm of *x*. - .. method:: logb(x) + .. method:: logb(x, /) Returns the exponent of the magnitude of the operand's MSD. - .. method:: logical_and(x, y) + .. method:: logical_and(x, y, /) Applies the logical operation *and* between each operand's digits. - .. method:: logical_invert(x) + .. method:: logical_invert(x, /) Invert all the digits in *x*. - .. method:: logical_or(x, y) + .. method:: logical_or(x, y, /) Applies the logical operation *or* between each operand's digits. - .. method:: logical_xor(x, y) + .. method:: logical_xor(x, y, /) Applies the logical operation *xor* between each operand's digits. - .. method:: max(x, y) + .. method:: max(x, y, /) Compares two values numerically and returns the maximum. - .. method:: max_mag(x, y) + .. method:: max_mag(x, y, /) Compares the values numerically with their sign ignored. - .. method:: min(x, y) + .. method:: min(x, y, /) Compares two values numerically and returns the minimum. - .. method:: min_mag(x, y) + .. method:: min_mag(x, y, /) Compares the values numerically with their sign ignored. - .. method:: minus(x) + .. method:: minus(x, /) Minus corresponds to the unary prefix minus operator in Python. - .. method:: multiply(x, y) + .. method:: multiply(x, y, /) Return the product of *x* and *y*. - .. method:: next_minus(x) + .. method:: next_minus(x, /) Returns the largest representable number smaller than *x*. - .. method:: next_plus(x) + .. method:: next_plus(x, /) Returns the smallest representable number larger than *x*. - .. method:: next_toward(x, y) + .. method:: next_toward(x, y, /) Returns the number closest to *x*, in direction towards *y*. - .. method:: normalize(x) + .. method:: normalize(x, /) Reduces *x* to its simplest form. - .. method:: number_class(x) + .. method:: number_class(x, /) Returns an indication of the class of *x*. - .. method:: plus(x) + .. method:: plus(x, /) Plus corresponds to the unary prefix plus operator in Python. This operation applies the context precision and rounding, so it is *not* an @@ -1478,7 +1490,7 @@ In addition to the three supplied contexts, new contexts can be created with the always exact. - .. method:: quantize(x, y) + .. method:: quantize(x, y, /) Returns a value equal to *x* (rounded), having the exponent of *y*. @@ -1488,7 +1500,7 @@ In addition to the three supplied contexts, new contexts can be created with the Just returns 10, as this is Decimal, :) - .. method:: remainder(x, y) + .. method:: remainder(x, y, /) Returns the remainder from integer division. @@ -1496,43 +1508,43 @@ In addition to the three supplied contexts, new contexts can be created with the dividend. - .. method:: remainder_near(x, y) + .. method:: remainder_near(x, y, /) Returns ``x - y * n``, where *n* is the integer nearest the exact value of ``x / y`` (if the result is 0 then its sign will be the sign of *x*). - .. method:: rotate(x, y) + .. method:: rotate(x, y, /) Returns a rotated copy of *x*, *y* times. - .. method:: same_quantum(x, y) + .. method:: same_quantum(x, y, /) Returns ``True`` if the two operands have the same exponent. - .. method:: scaleb (x, y) + .. method:: scaleb (x, y, /) Returns the first operand after adding the second value its exp. - .. method:: shift(x, y) + .. method:: shift(x, y, /) Returns a shifted copy of *x*, *y* times. - .. method:: sqrt(x) + .. method:: sqrt(x, /) Square root of a non-negative number to context precision. - .. method:: subtract(x, y) + .. method:: subtract(x, y, /) Return the difference between *x* and *y*. - .. method:: to_eng_string(x) + .. method:: to_eng_string(x, /) Convert to a string, using engineering notation if an exponent is needed. @@ -1541,12 +1553,12 @@ In addition to the three supplied contexts, new contexts can be created with the require the addition of either one or two trailing zeros. - .. method:: to_integral_exact(x) + .. method:: to_integral_exact(x, /) Rounds to an integer. - .. method:: to_sci_string(x) + .. method:: to_sci_string(x, /) Converts a number to a string using scientific notation. @@ -1769,7 +1781,7 @@ The following table summarizes the hierarchy of signals:: .. _decimal-notes: -Floating-Point Notes +Floating-point notes -------------------- @@ -2088,20 +2100,20 @@ to work with the :class:`Decimal` class:: Decimal FAQ ----------- -Q. It is cumbersome to type ``decimal.Decimal('1234.5')``. Is there a way to +Q: It is cumbersome to type ``decimal.Decimal('1234.5')``. Is there a way to minimize typing when using the interactive interpreter? -A. Some users abbreviate the constructor to just a single letter: +A: Some users abbreviate the constructor to just a single letter: >>> D = decimal.Decimal >>> D('1.23') + D('3.45') Decimal('4.68') -Q. In a fixed-point application with two decimal places, some inputs have many +Q: In a fixed-point application with two decimal places, some inputs have many places and need to be rounded. Others are not supposed to have excess digits and need to be validated. What methods should be used? -A. The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If +A: The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If the :const:`Inexact` trap is set, it is also useful for validation: >>> TWOPLACES = Decimal(10) ** -2 # same as Decimal('0.01') @@ -2119,10 +2131,10 @@ the :const:`Inexact` trap is set, it is also useful for validation: ... Inexact: None -Q. Once I have valid two place inputs, how do I maintain that invariant +Q: Once I have valid two place inputs, how do I maintain that invariant throughout an application? -A. Some operations like addition, subtraction, and multiplication by an integer +A: Some operations like addition, subtraction, and multiplication by an integer will automatically preserve fixed point. Others operations, like division and non-integer multiplication, will change the number of decimal places and need to be followed-up with a :meth:`~Decimal.quantize` step: @@ -2154,21 +2166,21 @@ to handle the :meth:`~Decimal.quantize` step: >>> div(b, a) Decimal('0.03') -Q. There are many ways to express the same value. The numbers ``200``, +Q: There are many ways to express the same value. The numbers ``200``, ``200.000``, ``2E2``, and ``.02E+4`` all have the same value at various precisions. Is there a way to transform them to a single recognizable canonical value? -A. The :meth:`~Decimal.normalize` method maps all equivalent values to a single +A: The :meth:`~Decimal.normalize` method maps all equivalent values to a single representative: >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) >>> [v.normalize() for v in values] [Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2')] -Q. When does rounding occur in a computation? +Q: When does rounding occur in a computation? -A. It occurs *after* the computation. The philosophy of the decimal +A: It occurs *after* the computation. The philosophy of the decimal specification is that numbers are considered exact and are created independent of the current context. They can even have greater precision than current context. Computations process with those @@ -2186,10 +2198,10 @@ applied to the *result* of the computation:: >>> pi + 0 - Decimal('0.00005'). # Intermediate values are rounded Decimal('3.1416') -Q. Some decimal values always print with exponential notation. Is there a way +Q: Some decimal values always print with exponential notation. Is there a way to get a non-exponential representation? -A. For some values, exponential notation is the only way to express the number +A: For some values, exponential notation is the only way to express the number of significant places in the coefficient. For example, expressing ``5.0E+3`` as ``5000`` keeps the value constant but cannot show the original's two-place significance. @@ -2204,9 +2216,9 @@ value unchanged: >>> remove_exponent(Decimal('5E+3')) Decimal('5000') -Q. Is there a way to convert a regular float to a :class:`Decimal`? +Q: Is there a way to convert a regular float to a :class:`Decimal`? -A. Yes, any binary floating-point number can be exactly expressed as a +A: Yes, any binary floating-point number can be exactly expressed as a Decimal though an exact conversion may take more precision than intuition would suggest: @@ -2215,19 +2227,19 @@ suggest: >>> Decimal(math.pi) Decimal('3.141592653589793115997963468544185161590576171875') -Q. Within a complex calculation, how can I make sure that I haven't gotten a +Q: Within a complex calculation, how can I make sure that I haven't gotten a spurious result because of insufficient precision or rounding anomalies. -A. The decimal module makes it easy to test results. A best practice is to +A: The decimal module makes it easy to test results. A best practice is to re-run calculations using greater precision and with various rounding modes. Widely differing results indicate insufficient precision, rounding mode issues, ill-conditioned inputs, or a numerically unstable algorithm. -Q. I noticed that context precision is applied to the results of operations but +Q: I noticed that context precision is applied to the results of operations but not to the inputs. Is there anything to watch out for when mixing values of different precisions? -A. Yes. The principle is that all values are considered to be exact and so is +A: Yes. The principle is that all values are considered to be exact and so is the arithmetic on those values. Only the results are rounded. The advantage for inputs is that "what you type is what you get". A disadvantage is that the results can look odd if you forget that the inputs haven't been rounded: @@ -2255,9 +2267,9 @@ Alternatively, inputs can be rounded upon creation using the >>> Context(prec=5, rounding=ROUND_DOWN).create_decimal('1.2345678') Decimal('1.2345') -Q. Is the CPython implementation fast for large numbers? +Q: Is the CPython implementation fast for large numbers? -A. Yes. In the CPython and PyPy3 implementations, the C/CFFI versions of +A: Yes. In the CPython and PyPy3 implementations, the C/CFFI versions of the decimal module integrate the high speed `libmpdec `_ library for arbitrary precision correctly rounded decimal floating-point arithmetic [#]_. diff --git a/Doc/library/dialog.rst b/Doc/library/dialog.rst index 191e0da12103fa..e0693e8eb6ed22 100644 --- a/Doc/library/dialog.rst +++ b/Doc/library/dialog.rst @@ -220,7 +220,7 @@ is the base class for dialogs defined in other supporting modules. .. class:: Dialog(master=None, **options) - .. method:: show(color=None, **options) + .. method:: show(**options) Render the Dialog window. diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst index ce948a6860f02c..fcd240e7e8283d 100644 --- a/Doc/library/difflib.rst +++ b/Doc/library/difflib.rst @@ -231,7 +231,7 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module. *linejunk*: A function that accepts a single string argument, and returns true if the string is junk, or false if not. The default is ``None``. There is also a module-level function :func:`IS_LINE_JUNK`, which filters out lines - without visible characters, except for at most one pound character (``'#'``) + without visible characters, except for at most one hash character (``'#'``) -- however the underlying :class:`SequenceMatcher` class does a dynamic analysis of which lines are so frequent as to constitute noise, and this usually works better than using this function. @@ -351,9 +351,9 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module. .. seealso:: - `Pattern Matching: The Gestalt Approach `_ + `Pattern Matching: The Gestalt Approach `_ Discussion of a similar algorithm by John W. Ratcliff and D. E. Metzener. This - was published in `Dr. Dobb's Journal `_ in July, 1988. + was published in Dr. Dobb's Journal in July, 1988. .. _sequence-matcher: diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 44767b5dd2d5c9..ff4681dcb0e8a7 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -585,6 +585,22 @@ operations on it as if it was a Python list. The top of the stack corresponds to generate line tracing events. +.. opcode:: NOT_TAKEN + + Do nothing code. + Used by the interpreter to record :monitoring-event:`BRANCH_LEFT` + and :monitoring-event:`BRANCH_RIGHT` events for :mod:`sys.monitoring`. + + .. versionadded:: 3.14 + + +.. opcode:: POP_ITER + + Removes the iterator from the top of the stack. + + .. versionadded:: 3.14 + + .. opcode:: POP_TOP Removes the top-of-stack item:: @@ -1094,14 +1110,6 @@ iterations of the loop. .. versionadded:: 3.14 -.. opcode:: LOAD_CONST_IMMORTAL (consti) - - Pushes ``co_consts[consti]`` onto the stack. - Can be used when the constant value is known to be immortal. - - .. versionadded:: 3.14 - - .. opcode:: LOAD_NAME (namei) Pushes the value associated with ``co_names[namei]`` onto the stack. @@ -1128,6 +1136,48 @@ iterations of the loop. .. versionadded:: 3.12 +.. opcode:: BUILD_TEMPLATE + + Constructs a new :class:`~string.templatelib.Template` instance from a tuple + of strings and a tuple of interpolations and pushes the resulting object + onto the stack:: + + interpolations = STACK.pop() + strings = STACK.pop() + STACK.append(_build_template(strings, interpolations)) + + .. versionadded:: 3.14 + + +.. opcode:: BUILD_INTERPOLATION (format) + + Constructs a new :class:`~string.templatelib.Interpolation` instance from a + value and its source expression and pushes the resulting object onto the + stack. + + If no conversion or format specification is present, ``format`` is set to + ``2``. + + If the low bit of ``format`` is set, it indicates that the interpolation + contains a format specification. + + If ``format >> 2`` is non-zero, it indicates that the interpolation + contains a conversion. The value of ``format >> 2`` is the conversion type + (``0`` for no conversion, ``1`` for ``!s``, ``2`` for ``!r``, and + ``3`` for ``!a``):: + + conversion = format >> 2 + if format & 1: + format_spec = STACK.pop() + else: + format_spec = None + expression = STACK.pop() + value = STACK.pop() + STACK.append(_build_interpolation(value, expression, conversion, format_spec)) + + .. versionadded:: 3.14 + + .. opcode:: BUILD_TUPLE (count) Creates a tuple consuming *count* items from the stack, and pushes the @@ -1592,7 +1642,7 @@ iterations of the loop. Pushes a ``NULL`` to the stack. Used in the call sequence to match the ``NULL`` pushed by - :opcode:`LOAD_METHOD` for non-method calls. + :opcode:`!LOAD_METHOD` for non-method calls. .. versionadded:: 3.11 @@ -1623,9 +1673,13 @@ iterations of the loop. * ``0x02`` a dictionary of keyword-only parameters' default values * ``0x04`` a tuple of strings containing parameters' annotations * ``0x08`` a tuple containing cells for free variables, making a closure + * ``0x10`` the :term:`annotate function` for the function object .. versionadded:: 3.13 + .. versionchanged:: 3.14 + Added ``0x10`` to indicate the annotate function for the function object. + .. opcode:: BUILD_SLICE (argc) @@ -1913,14 +1967,20 @@ but are replaced by real opcodes or removed before bytecode is generated. Marks the end of the code block associated with the last ``SETUP_FINALLY``, ``SETUP_CLEANUP`` or ``SETUP_WITH``. + +.. opcode:: LOAD_CONST_IMMORTAL (consti) + + Works as :opcode:`LOAD_CONST`, but is more efficient for immortal objects. + + .. opcode:: JUMP -.. opcode:: JUMP_NO_INTERRUPT + JUMP_NO_INTERRUPT Undirected relative jump instructions which are replaced by their directed (forward/backward) counterparts by the assembler. .. opcode:: JUMP_IF_TRUE -.. opcode:: JUMP_IF_FALSE + JUMP_IF_FALSE Conditional jumps which do not impact the stack. Replaced by the sequence ``COPY 1``, ``TO_BOOL``, ``POP_JUMP_IF_TRUE/FALSE``. @@ -1936,12 +1996,6 @@ but are replaced by real opcodes or removed before bytecode is generated. This opcode is now a pseudo-instruction. -.. opcode:: LOAD_METHOD - - Optimized unbound method lookup. Emitted as a ``LOAD_ATTR`` opcode - with a flag set in the arg. - - .. _opcode_collections: Opcode collections diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index b86fef9fd6f310..61463d6adcd143 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -174,7 +174,7 @@ with assorted summaries at the end. You can force verbose mode by passing ``verbose=True`` to :func:`testmod`, or prohibit it by passing ``verbose=False``. In either of those cases, -``sys.argv`` is not examined by :func:`testmod` (so passing ``-v`` or not +:data:`sys.argv` is not examined by :func:`testmod` (so passing ``-v`` or not has no effect). There is also a command line shortcut for running :func:`testmod`, see section @@ -231,7 +231,7 @@ documentation:: As with :func:`testmod`, :func:`testfile` won't display anything unless an example fails. If an example does fail, then the failing example(s) and the cause(s) of the failure(s) are printed to stdout, using the same format as -:func:`testmod`. +:func:`!testmod`. By default, :func:`testfile` looks for files in the calling module's directory. See section :ref:`doctest-basic-api` for a description of the optional arguments @@ -311,6 +311,13 @@ Which Docstrings Are Examined? The module docstring, and all function, class and method docstrings are searched. Objects imported into the module are not searched. +.. currentmodule:: None + +.. attribute:: module.__test__ + :no-typesetting: + +.. currentmodule:: doctest + In addition, there are cases when you want tests to be part of a module but not part of the help text, which requires that the tests not be included in the docstring. Doctest looks for a module-level variable called ``__test__`` and uses it to locate other @@ -343,6 +350,13 @@ searches them recursively for docstrings, which are then scanned for tests. Any classes found are recursively searched similarly, to test docstrings in their contained methods and nested classes. +.. note:: + + ``doctest`` can only automatically discover classes and functions that are + defined at the module level or inside other classes. + + Since nested classes and functions only exist when an outer function + is called, they cannot be discovered. Define them outside to make them visible. .. _doctest-finding-examples: @@ -533,7 +547,7 @@ Some details you should read once, but won't need to remember: * The interactive shell omits the traceback header line for some :exc:`SyntaxError`\ s. But doctest uses the traceback header line to distinguish exceptions from non-exceptions. So in the rare case where you need - to test a :exc:`SyntaxError` that omits the traceback header, you will need to + to test a :exc:`!SyntaxError` that omits the traceback header, you will need to manually add the traceback header line to your test example. .. index:: single: ^ (caret); marker @@ -860,15 +874,15 @@ The :const:`ELLIPSIS` directive gives a nice approach for the last example: Floating-point numbers are also subject to small output variations across -platforms, because Python defers to the platform C library for float formatting, -and C libraries vary widely in quality here. :: +platforms, because Python defers to the platform C library for some +floating-point calculations, and C libraries vary widely in quality here. :: - >>> 1./7 # risky - 0.14285714285714285 - >>> print(1./7) # safer - 0.142857142857 - >>> print(round(1./7, 6)) # much safer - 0.142857 + >>> 1000**0.1 # risky + 1.9952623149688797 + >>> round(1000**0.1, 9) # safer + 1.995262315 + >>> print(f'{1000**0.1:.4f}') # much safer + 1.9953 Numbers of the form ``I/2.**J`` are safe across all platforms, and I often contrive doctest examples to produce numbers of that form:: @@ -938,13 +952,13 @@ and :ref:`doctest-simple-testfile`. Optional argument *verbose* prints lots of stuff if true, and prints only failures if false; by default, or if ``None``, it's true if and only if ``'-v'`` - is in ``sys.argv``. + is in :data:`sys.argv`. Optional argument *report* prints a summary at the end when true, else prints nothing at the end. In verbose mode, the summary is detailed, else the summary is very brief (in fact, empty if all tests passed). - Optional argument *optionflags* (default value 0) takes the + Optional argument *optionflags* (default value ``0``) takes the :ref:`bitwise OR ` of option flags. See section :ref:`doctest-options`. @@ -1045,7 +1059,7 @@ from text files and modules with doctests: The returned :class:`unittest.TestSuite` is to be run by the unittest framework and runs the interactive examples in each file. If an example in any file - fails, then the synthesized unit test fails, and a :exc:`failureException` + fails, then the synthesized unit test fails, and a :exc:`~unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a file are skipped, then the synthesized unit test is also marked as skipped. @@ -1078,13 +1092,14 @@ from text files and modules with doctests: Optional argument *setUp* specifies a set-up function for the test suite. This is called before running the tests in each file. The *setUp* function - will be passed a :class:`DocTest` object. The setUp function can access the - test globals as the *globs* attribute of the test passed. + will be passed a :class:`DocTest` object. The *setUp* function can access the + test globals as the :attr:`~DocTest.globs` attribute of the test passed. Optional argument *tearDown* specifies a tear-down function for the test suite. This is called after running the tests in each file. The *tearDown* - function will be passed a :class:`DocTest` object. The setUp function can - access the test globals as the *globs* attribute of the test passed. + function will be passed a :class:`DocTest` object. The *tearDown* function can + access the test globals as the :attr:`~DocTest.globs` attribute of the test + passed. Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each @@ -1111,11 +1126,12 @@ from text files and modules with doctests: Convert doctest tests for a module to a :class:`unittest.TestSuite`. The returned :class:`unittest.TestSuite` is to be run by the unittest framework - and runs each doctest in the module. If any of the doctests fail, then the - synthesized unit test fails, and a :exc:`failureException` exception is raised + and runs each doctest in the module. + Each docstring is run as a separate unit test. + If any of the doctests fail, then the synthesized unit test fails, + and a :exc:`unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a docstring are skipped, then the - synthesized unit test is also marked as skipped. Optional argument *module* provides the module to be tested. It can be a module object or a (possibly dotted) module name. If not specified, the module calling @@ -1123,7 +1139,7 @@ from text files and modules with doctests: Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each - test. By default, *globs* is a new empty dictionary. + test. By default, *globs* is the module's :attr:`~module.__dict__`. Optional argument *extraglobs* specifies an extra set of global variables, which is merged into *globs*. By default, no extra globals are used. @@ -1132,7 +1148,7 @@ from text files and modules with doctests: drop-in replacement) that is used to extract doctests from the module. Optional arguments *setUp*, *tearDown*, and *optionflags* are the same as for - function :func:`DocFileSuite` above. + function :func:`DocFileSuite` above, but they are called for each docstring. This function uses the same search technique as :func:`testmod`. @@ -1140,12 +1156,6 @@ from text files and modules with doctests: :func:`DocTestSuite` returns an empty :class:`unittest.TestSuite` if *module* contains no docstrings instead of raising :exc:`ValueError`. -.. exception:: failureException - - When doctests which have been converted to unit tests by :func:`DocFileSuite` - or :func:`DocTestSuite` fail, this exception is raised showing the name of - the file containing the test and a (sometimes approximate) line number. - Under the covers, :func:`DocTestSuite` creates a :class:`unittest.TestSuite` out of :class:`!doctest.DocTestCase` instances, and :class:`!DocTestCase` is a subclass of :class:`unittest.TestCase`. :class:`!DocTestCase` isn't documented @@ -1158,15 +1168,15 @@ of :class:`!DocTestCase`. So both ways of creating a :class:`unittest.TestSuite` run instances of :class:`!DocTestCase`. This is important for a subtle reason: when you run -:mod:`doctest` functions yourself, you can control the :mod:`doctest` options in -use directly, by passing option flags to :mod:`doctest` functions. However, if -you're writing a :mod:`unittest` framework, :mod:`unittest` ultimately controls +:mod:`doctest` functions yourself, you can control the :mod:`!doctest` options in +use directly, by passing option flags to :mod:`!doctest` functions. However, if +you're writing a :mod:`unittest` framework, :mod:`!unittest` ultimately controls when and how tests get run. The framework author typically wants to control -:mod:`doctest` reporting options (perhaps, e.g., specified by command line -options), but there's no way to pass options through :mod:`unittest` to -:mod:`doctest` test runners. +:mod:`!doctest` reporting options (perhaps, e.g., specified by command line +options), but there's no way to pass options through :mod:`!unittest` to +:mod:`!doctest` test runners. -For this reason, :mod:`doctest` also supports a notion of :mod:`doctest` +For this reason, :mod:`doctest` also supports a notion of :mod:`!doctest` reporting flags specific to :mod:`unittest` support, via this function: @@ -1181,12 +1191,12 @@ reporting flags specific to :mod:`unittest` support, via this function: :mod:`unittest`: the :meth:`!runTest` method of :class:`!DocTestCase` looks at the option flags specified for the test case when the :class:`!DocTestCase` instance was constructed. If no reporting flags were specified (which is the - typical and expected case), :mod:`!doctest`'s :mod:`unittest` reporting flags are + typical and expected case), :mod:`!doctest`'s :mod:`!unittest` reporting flags are :ref:`bitwise ORed ` into the option flags, and the option flags so augmented are passed to the :class:`DocTestRunner` instance created to run the doctest. If any reporting flags were specified when the :class:`!DocTestCase` instance was constructed, :mod:`!doctest`'s - :mod:`unittest` reporting flags are ignored. + :mod:`!unittest` reporting flags are ignored. The value of the :mod:`unittest` reporting flags in effect before the function was called is returned by the function. @@ -1279,7 +1289,7 @@ DocTest Objects .. attribute:: filename The name of the file that this :class:`DocTest` was extracted from; or - ``None`` if the filename is unknown, or if the :class:`DocTest` was not + ``None`` if the filename is unknown, or if the :class:`!DocTest` was not extracted from a file. @@ -1419,10 +1429,10 @@ DocTestFinder objects The globals for each :class:`DocTest` is formed by combining *globs* and *extraglobs* (bindings in *extraglobs* override bindings in *globs*). A new - shallow copy of the globals dictionary is created for each :class:`DocTest`. - If *globs* is not specified, then it defaults to the module's *__dict__*, if - specified, or ``{}`` otherwise. If *extraglobs* is not specified, then it - defaults to ``{}``. + shallow copy of the globals dictionary is created for each :class:`!DocTest`. + If *globs* is not specified, then it defaults to the module's + :attr:`~module.__dict__`, if specified, or ``{}`` otherwise. + If *extraglobs* is not specified, then it defaults to ``{}``. .. _doctest-doctestparser: @@ -1446,7 +1456,7 @@ DocTestParser objects :class:`DocTest` object. *globs*, *name*, *filename*, and *lineno* are attributes for the new - :class:`DocTest` object. See the documentation for :class:`DocTest` for more + :class:`!DocTest` object. See the documentation for :class:`DocTest` for more information. @@ -1461,7 +1471,7 @@ DocTestParser objects Divide the given string into examples and intervening text, and return them as a list of alternating :class:`Example`\ s and strings. Line numbers for the - :class:`Example`\ s are 0-based. The optional argument *name* is a name + :class:`!Example`\ s are 0-based. The optional argument *name* is a name identifying this string, and is only used for error messages. @@ -1501,7 +1511,7 @@ DocTestRunner objects :class:`OutputChecker`. This comparison may be customized with a number of option flags; see section :ref:`doctest-options` for more information. If the option flags are insufficient, then the comparison may also be customized by - passing a subclass of :class:`OutputChecker` to the constructor. + passing a subclass of :class:`!OutputChecker` to the constructor. The test runner's display output can be controlled in two ways. First, an output function can be passed to :meth:`run`; this function will be called @@ -1540,7 +1550,7 @@ DocTestRunner objects output; it should not be called directly. *example* is the example about to be processed. *test* is the test - *containing example*. *out* is the output function that was passed to + containing *example*. *out* is the output function that was passed to :meth:`DocTestRunner.run`. @@ -1940,7 +1950,7 @@ several options for organizing tests: containing test cases for the named topics. These functions can be included in the same file as the module, or separated out into a separate test file. -* Define a ``__test__`` dictionary mapping from regression test topics to +* Define a :attr:`~module.__test__` dictionary mapping from regression test topics to docstrings containing test cases. When you have placed your tests in a module, the module can itself be the test diff --git a/Doc/library/email.compat32-message.rst b/Doc/library/email.compat32-message.rst index 4285c436e8da80..5754c2b65b239f 100644 --- a/Doc/library/email.compat32-message.rst +++ b/Doc/library/email.compat32-message.rst @@ -181,7 +181,7 @@ Here are the methods of the :class:`Message` class: :meth:`set_payload` instead. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content` and the related ``make`` and ``add`` methods. @@ -224,7 +224,7 @@ Here are the methods of the :class:`Message` class: ASCII charset. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.get_content` and :meth:`~email.message.EmailMessage.iter_parts`. @@ -236,7 +236,7 @@ Here are the methods of the :class:`Message` class: the message's default character set; see :meth:`set_charset` for details. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content`. @@ -265,9 +265,9 @@ Here are the methods of the :class:`Message` class: using that :mailheader:`Content-Transfer-Encoding` and is not modified. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *charset* parameter of the - :meth:`email.emailmessage.EmailMessage.set_content` method. + :meth:`email.message.EmailMessage.set_content` method. .. method:: get_charset() @@ -276,7 +276,7 @@ Here are the methods of the :class:`Message` class: message's payload. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class it always returns + :class:`~email.message.EmailMessage` class it always returns ``None``. @@ -486,7 +486,7 @@ Here are the methods of the :class:`Message` class: search instead of :mailheader:`Content-Type`. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -524,7 +524,7 @@ Here are the methods of the :class:`Message` class: to ``False``. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -579,7 +579,7 @@ Here are the methods of the :class:`Message` class: header is also added. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the ``make_`` and ``add_`` methods. diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index 219fad0d2f6745..f49885b8785235 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -178,16 +178,36 @@ The :mod:`email.header` module also provides the following convenient functions. Decode a message header value without converting the character set. The header value is in *header*. - This function returns a list of ``(decoded_string, charset)`` pairs containing - each of the decoded parts of the header. *charset* is ``None`` for non-encoded - parts of the header, otherwise a lower case string containing the name of the - character set specified in the encoded string. + For historical reasons, this function may return either: - Here's an example:: + 1. A list of pairs containing each of the decoded parts of the header, + ``(decoded_bytes, charset)``, where *decoded_bytes* is always an instance of + :class:`bytes`, and *charset* is either: + + - A lower case string containing the name of the character set specified. + + - ``None`` for non-encoded parts of the header. + + 2. A list of length 1 containing a pair ``(string, None)``, where + *string* is always an instance of :class:`str`. + + An :exc:`email.errors.HeaderParseError` may be raised when certain decoding + errors occur (e.g. a base64 decoding exception). + + Here are examples: >>> from email.header import decode_header >>> decode_header('=?iso-8859-1?q?p=F6stal?=') [(b'p\xf6stal', 'iso-8859-1')] + >>> decode_header('unencoded_string') + [('unencoded_string', None)] + >>> decode_header('bar =?utf-8?B?ZsOzbw==?=') + [(b'bar ', None), (b'f\xc3\xb3o', 'utf-8')] + + .. note:: + + This function exists for backwards compatibility only. For + new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. .. function:: make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' ') @@ -203,3 +223,7 @@ The :mod:`email.header` module also provides the following convenient functions. :class:`Header` instance. Optional *maxlinelen*, *header_name*, and *continuation_ws* are as in the :class:`Header` constructor. + .. note:: + + This function exists for backwards compatibility only, and is + not recommended for use in new code. diff --git a/Doc/library/email.headerregistry.rst b/Doc/library/email.headerregistry.rst index 7f8044932fae99..ff8b601fe3d1bb 100644 --- a/Doc/library/email.headerregistry.rst +++ b/Doc/library/email.headerregistry.rst @@ -294,7 +294,7 @@ variant, :attr:`~.BaseHeader.max_count` is set to 1. ``inline`` and ``attachment`` are the only valid values in common use. -.. class:: ContentTransferEncoding +.. class:: ContentTransferEncodingHeader Handles the :mailheader:`Content-Transfer-Encoding` header. diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst index 439b5c8f34b65a..6a70714dc3ee42 100644 --- a/Doc/library/email.parser.rst +++ b/Doc/library/email.parser.rst @@ -116,7 +116,7 @@ Here is the API for the :class:`BytesFeedParser`: Works like :class:`BytesFeedParser` except that the input to the :meth:`~BytesFeedParser.feed` method must be a string. This is of limited utility, since the only way for such a message to be valid is for it to - contain only ASCII text or, if :attr:`~email.policy.Policy.utf8` is + contain only ASCII text or, if :attr:`~email.policy.EmailPolicy.utf8` is ``True``, no binary attachments. .. versionchanged:: 3.3 Added the *policy* keyword. @@ -159,7 +159,7 @@ message body, instead setting the payload to the raw body. methods. The bytes contained in *fp* must be formatted as a block of :rfc:`5322` - (or, if :attr:`~email.policy.Policy.utf8` is ``True``, :rfc:`6532`) + (or, if :attr:`~email.policy.EmailPolicy.utf8` is ``True``, :rfc:`6532`) style headers and header continuation lines, optionally preceded by an envelope header. The header block is terminated either by the end of the data or by a blank line. Following the header block is the body of the diff --git a/Doc/library/ensurepip.rst b/Doc/library/ensurepip.rst index fa102c4a080103..32b92c01570004 100644 --- a/Doc/library/ensurepip.rst +++ b/Doc/library/ensurepip.rst @@ -30,6 +30,8 @@ when creating a virtual environment) or after explicitly uninstalling needed to bootstrap ``pip`` are included as internal parts of the package. +.. include:: ../includes/optional-module.rst + .. seealso:: :ref:`installing-index` @@ -40,7 +42,9 @@ when creating a virtual environment) or after explicitly uninstalling .. include:: ../includes/wasm-mobile-notavail.rst -Command line interface +.. _ensurepip-cli: + +Command-line interface ---------------------- .. program:: ensurepip diff --git a/Doc/library/enum.rst b/Doc/library/enum.rst index c9b2c7d76b6746..a8a7e671aadca2 100644 --- a/Doc/library/enum.rst +++ b/Doc/library/enum.rst @@ -175,6 +175,10 @@ Data Types final *enum*, as well as creating the enum members, properly handling duplicates, providing iteration over the enum class, etc. + .. versionadded:: 3.11 + + Before 3.11 ``EnumType`` was called ``EnumMeta``, which is still available as an alias. + .. method:: EnumType.__call__(cls, value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None) This method is called in two different ways: @@ -206,7 +210,7 @@ Data Types >>> Color.RED.value in Color True - .. versionchanged:: 3.12 + .. versionchanged:: 3.12 Before Python 3.12, a ``TypeError`` is raised if a non-Enum-member is used in a containment check. @@ -251,20 +255,6 @@ Data Types >>> list(reversed(Color)) [, , ] - .. method:: EnumType._add_alias_ - - Adds a new name as an alias to an existing member. Raises a - :exc:`NameError` if the name is already assigned to a different member. - - .. method:: EnumType._add_value_alias_ - - Adds a new value as an alias to an existing member. Raises a - :exc:`ValueError` if the value is already linked with a different member. - - .. versionadded:: 3.11 - - Before 3.11 ``EnumType`` was called ``EnumMeta``, which is still available as an alias. - .. class:: Enum @@ -325,6 +315,7 @@ Data Types Returns ``['__class__', '__doc__', '__module__', 'name', 'value']`` and any public methods defined on *self.__class__*:: + >>> from enum import Enum >>> from datetime import date >>> class Weekday(Enum): ... MONDAY = 1 @@ -351,7 +342,7 @@ Data Types A *staticmethod* that is used to determine the next value returned by :class:`auto`:: - >>> from enum import auto + >>> from enum import auto, Enum >>> class PowersOfThree(Enum): ... @staticmethod ... def _generate_next_value_(name, start, count, last_values): @@ -383,7 +374,7 @@ Data Types A *classmethod* for looking up values not found in *cls*. By default it does nothing, but can be overridden to implement custom search behavior:: - >>> from enum import StrEnum + >>> from enum import auto, StrEnum >>> class Build(StrEnum): ... DEBUG = auto() ... OPTIMIZED = auto() @@ -422,6 +413,7 @@ Data Types Returns the string used for *repr()* calls. By default, returns the *Enum* name, member name, and value, but can be overridden:: + >>> from enum import auto, Enum >>> class OtherStyle(Enum): ... ALTERNATE = auto() ... OTHER = auto() @@ -438,6 +430,7 @@ Data Types Returns the string used for *str()* calls. By default, returns the *Enum* name and member name, but can be overridden:: + >>> from enum import auto, Enum >>> class OtherStyle(Enum): ... ALTERNATE = auto() ... OTHER = auto() @@ -453,6 +446,7 @@ Data Types Returns the string used for *format()* and *f-string* calls. By default, returns :meth:`__str__` return value, but can be overridden:: + >>> from enum import auto, Enum >>> class OtherStyle(Enum): ... ALTERNATE = auto() ... OTHER = auto() @@ -470,6 +464,30 @@ Data Types .. versionchanged:: 3.12 Added :ref:`enum-dataclass-support` + .. method:: Enum._add_alias_ + + Adds a new name as an alias to an existing member:: + + >>> Color.RED._add_alias_("ERROR") + >>> Color.ERROR + + + Raises a :exc:`NameError` if the name is already assigned to a different member. + + .. versionadded:: 3.13 + + .. method:: Enum._add_value_alias_ + + Adds a new value as an alias to an existing member:: + + >>> Color.RED._add_value_alias_(42) + >>> Color(42) + + + Raises a :exc:`ValueError` if the value is already linked with a different member. + + .. versionadded:: 3.13 + .. class:: IntEnum @@ -504,16 +522,31 @@ Data Types .. class:: StrEnum - ``StrEnum`` is the same as :class:`Enum`, but its members are also strings and can be used - in most of the same places that a string can be used. The result of any string - operation performed on or with a *StrEnum* member is not part of the enumeration. + *StrEnum* is the same as :class:`Enum`, but its members are also strings and + can be used in most of the same places that a string can be used. The result + of any string operation performed on or with a *StrEnum* member is not part + of the enumeration. + + >>> from enum import StrEnum, auto + >>> class Color(StrEnum): + ... RED = 'r' + ... GREEN = 'g' + ... BLUE = 'b' + ... UNKNOWN = auto() + ... + >>> Color.RED + + >>> Color.UNKNOWN + + >>> str(Color.UNKNOWN) + 'unknown' .. note:: There are places in the stdlib that check for an exact :class:`str` instead of a :class:`str` subclass (i.e. ``type(unknown) == str`` instead of ``isinstance(unknown, str)``), and in those locations you - will need to use ``str(StrEnum.member)``. + will need to use ``str(MyStrEnum.MY_MEMBER)``. .. note:: @@ -864,10 +897,6 @@ Once all the members are created it is no longer used. Supported ``_sunder_`` names """""""""""""""""""""""""""" -- :meth:`~EnumType._add_alias_` -- adds a new name as an alias to an existing - member. -- :meth:`~EnumType._add_value_alias_` -- adds a new value as an alias to an - existing member. - :attr:`~Enum._name_` -- name of the member - :attr:`~Enum._value_` -- value of the member; can be set in ``__new__`` - :meth:`~Enum._missing_` -- a lookup function used when a value is not found; @@ -888,6 +917,11 @@ Supported ``_sunder_`` names For :class:`Flag` classes the next value chosen will be the next highest power-of-two. +- :meth:`~Enum._add_alias_` -- adds a new name as an alias to an existing + member. +- :meth:`~Enum._add_value_alias_` -- adds a new value as an alias to an + existing member. + - While ``_sunder_`` names are generally reserved for the further development of the :class:`Enum` class and can not be used, some are explicitly allowed: diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index bb72032891ea98..9d3e0a4c20f9eb 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -204,10 +204,16 @@ The following exceptions are the exceptions that are usually raised. assignment fails. (When an object does not support attribute references or attribute assignments at all, :exc:`TypeError` is raised.) - The :attr:`name` and :attr:`obj` attributes can be set using keyword-only - arguments to the constructor. When set they represent the name of the attribute - that was attempted to be accessed and the object that was accessed for said - attribute, respectively. + The optional *name* and *obj* keyword-only arguments + set the corresponding attributes: + + .. attribute:: name + + The name of the attribute that was attempted to be accessed. + + .. attribute:: obj + + The object that was accessed for the named attribute. .. versionchanged:: 3.10 Added the :attr:`name` and :attr:`obj` attributes. @@ -215,7 +221,7 @@ The following exceptions are the exceptions that are usually raised. .. exception:: EOFError Raised when the :func:`input` function hits an end-of-file condition (EOF) - without reading any data. (N.B.: the :meth:`io.IOBase.read` and + without reading any data. (Note: the :meth:`!io.IOBase.read` and :meth:`io.IOBase.readline` methods return an empty string when they hit EOF.) @@ -312,9 +318,11 @@ The following exceptions are the exceptions that are usually raised. unqualified names. The associated value is an error message that includes the name that could not be found. - The :attr:`name` attribute can be set using a keyword-only argument to the - constructor. When set it represent the name of the variable that was attempted - to be accessed. + The optional *name* keyword-only argument sets the attribute: + + .. attribute:: name + + The name of the variable that was attempted to be accessed. .. versionchanged:: 3.10 Added the :attr:`name` attribute. @@ -382,7 +390,7 @@ The following exceptions are the exceptions that are usually raised. The corresponding error message, as provided by the operating system. It is formatted by the C - functions :c:func:`perror` under POSIX, and :c:func:`FormatMessage` + functions :c:func:`!perror` under POSIX, and :c:func:`!FormatMessage` under Windows. .. attribute:: filename @@ -398,7 +406,7 @@ The following exceptions are the exceptions that are usually raised. .. versionchanged:: 3.3 :exc:`EnvironmentError`, :exc:`IOError`, :exc:`WindowsError`, :exc:`socket.error`, :exc:`select.error` and - :exc:`mmap.error` have been merged into :exc:`OSError`, and the + :exc:`!mmap.error` have been merged into :exc:`OSError`, and the constructor may return a subclass. .. versionchanged:: 3.4 @@ -590,7 +598,7 @@ The following exceptions are the exceptions that are usually raised. handled, the Python interpreter exits; no stack traceback is printed. The constructor accepts the same optional argument passed to :func:`sys.exit`. If the value is an integer, it specifies the system exit status (passed to - C's :c:func:`exit` function); if it is ``None``, the exit status is zero; if + C's :c:func:`!exit` function); if it is ``None``, the exit status is zero; if it has another type (such as a string), the object's value is printed and the exit status is one. @@ -882,6 +890,9 @@ The following exceptions are used as warning categories; see the Base class for warnings about dubious syntax. + This warning is typically emitted when compiling Python source code, and usually won't be reported + when running already compiled code. + .. exception:: RuntimeWarning @@ -1048,7 +1059,7 @@ their subgroups based on the types of the contained exceptions. subclasses that need a different constructor signature need to override that rather than :meth:`~object.__init__`. For example, the following defines an exception group subclass which accepts an exit_code and - and constructs the group's message from it. :: + constructs the group's message from it. :: class Errors(ExceptionGroup): def __new__(cls, errors, exit_code): diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 5058b85bffb15c..f81e6bf5810f86 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -90,7 +90,7 @@ An error will be printed instead of the stack. Additionally, some compilers do not support :term:`CPython's ` implementation of C stack dumps. As a result, a different error may be printed -instead of the stack, even if the the operating system supports dumping stacks. +instead of the stack, even if the operating system supports dumping stacks. .. note:: @@ -228,6 +228,41 @@ handler: Fatal Python error: Segmentation fault Current thread 0x00007fb899f39700 (most recent call first): - File "/home/python/cpython/Lib/ctypes/__init__.py", line 486 in string_at + File "/opt/python/Lib/ctypes/__init__.py", line 486 in string_at File "", line 1 in + + Current thread's C stack trace (most recent call first): + Binary file "/opt/python/python", at _Py_DumpStack+0x42 [0x5b27f7d7147e] + Binary file "/opt/python/python", at +0x32dcbd [0x5b27f7d85cbd] + Binary file "/opt/python/python", at +0x32df8a [0x5b27f7d85f8a] + Binary file "/usr/lib/libc.so.6", at +0x3def0 [0x77b73226bef0] + Binary file "/usr/lib/libc.so.6", at +0x17ef9c [0x77b7323acf9c] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0xcdf6 [0x77b7315dddf6] + Binary file "/usr/lib/libffi.so.8", at +0x7976 [0x77b73158f976] + Binary file "/usr/lib/libffi.so.8", at +0x413c [0x77b73158c13c] + Binary file "/usr/lib/libffi.so.8", at ffi_call+0x12e [0x77b73158ef0e] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0x15a33 [0x77b7315e6a33] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0x164fa [0x77b7315e74fa] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0xc624 [0x77b7315dd624] + Binary file "/opt/python/python", at _PyObject_MakeTpCall+0xce [0x5b27f7b73883] + Binary file "/opt/python/python", at +0x11bab6 [0x5b27f7b73ab6] + Binary file "/opt/python/python", at PyObject_Vectorcall+0x23 [0x5b27f7b73b04] + Binary file "/opt/python/python", at _PyEval_EvalFrameDefault+0x490c [0x5b27f7cbb302] + Binary file "/opt/python/python", at +0x2818e6 [0x5b27f7cd98e6] + Binary file "/opt/python/python", at +0x281aab [0x5b27f7cd9aab] + Binary file "/opt/python/python", at PyEval_EvalCode+0xc5 [0x5b27f7cd9ba3] + Binary file "/opt/python/python", at +0x255957 [0x5b27f7cad957] + Binary file "/opt/python/python", at +0x255ab4 [0x5b27f7cadab4] + Binary file "/opt/python/python", at _PyEval_EvalFrameDefault+0x6c3e [0x5b27f7cbd634] + Binary file "/opt/python/python", at +0x2818e6 [0x5b27f7cd98e6] + Binary file "/opt/python/python", at +0x281aab [0x5b27f7cd9aab] + Binary file "/opt/python/python", at +0x11b6e1 [0x5b27f7b736e1] + Binary file "/opt/python/python", at +0x11d348 [0x5b27f7b75348] + Binary file "/opt/python/python", at +0x11d626 [0x5b27f7b75626] + Binary file "/opt/python/python", at PyObject_Call+0x20 [0x5b27f7b7565e] + Binary file "/opt/python/python", at +0x32a67a [0x5b27f7d8267a] + Binary file "/opt/python/python", at +0x32a7f8 [0x5b27f7d827f8] + Binary file "/opt/python/python", at +0x32ac1b [0x5b27f7d82c1b] + Binary file "/opt/python/python", at Py_RunMain+0x31 [0x5b27f7d82ebe] + Segmentation fault diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst index 12e61bc36f5db0..ee654b7a83e203 100644 --- a/Doc/library/fnmatch.rst +++ b/Doc/library/fnmatch.rst @@ -53,7 +53,7 @@ a :class:`!str` filename, and vice-versa. Finally, note that :func:`functools.lru_cache` with a *maxsize* of 32768 is used to cache the (typed) compiled regex patterns in the following -functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`. +functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`, :func:`.filterfalse`. .. function:: fnmatch(name, pat) diff --git a/Doc/library/fractions.rst b/Doc/library/fractions.rst index fc7f9a6301a915..d6d1c7a461c51c 100644 --- a/Doc/library/fractions.rst +++ b/Doc/library/fractions.rst @@ -14,8 +14,8 @@ The :mod:`fractions` module provides support for rational number arithmetic. -A Fraction instance can be constructed from a pair of integers, from -another rational number, or from a string. +A Fraction instance can be constructed from a pair of rational numbers, from +a single number, or from a string. .. index:: single: as_integer_ratio() @@ -25,8 +25,8 @@ another rational number, or from a string. The first version requires that *numerator* and *denominator* are instances of :class:`numbers.Rational` and returns a new :class:`Fraction` instance - with value ``numerator/denominator``. If *denominator* is ``0``, it - raises a :exc:`ZeroDivisionError`. + with a value equal to ``numerator/denominator``. + If *denominator* is zero, it raises a :exc:`ZeroDivisionError`. The second version requires that *number* is an instance of :class:`numbers.Rational` or has the :meth:`!as_integer_ratio` method @@ -125,7 +125,8 @@ another rational number, or from a string. .. attribute:: denominator - Denominator of the Fraction in lowest term. + Denominator of the Fraction in lowest terms. + Guaranteed to be positive. .. method:: as_integer_ratio() @@ -142,7 +143,7 @@ another rational number, or from a string. .. versionadded:: 3.12 - .. classmethod:: from_float(flt) + .. classmethod:: from_float(f) Alternative constructor which only accepts instances of :class:`float` or :class:`numbers.Integral`. Beware that diff --git a/Doc/library/frameworks.rst b/Doc/library/frameworks.rst index 15ceeec9c255ed..f8e2f6bb18cb1c 100644 --- a/Doc/library/frameworks.rst +++ b/Doc/library/frameworks.rst @@ -1,18 +1,13 @@ +:orphan: + .. _frameworks: ****************** -Program Frameworks +Program frameworks ****************** -The modules described in this chapter are frameworks that will largely dictate -the structure of your program. Currently the modules described here are all -oriented toward writing command-line interfaces. - -The full list of modules described in this chapter is: - - -.. toctree:: +This chapter is no longer maintained, and the modules it contained have been moved to their respective topical documentation. - turtle.rst - cmd.rst - shlex.rst +* :mod:`cmd` — :doc:`Command Line Interface Libraries <./cmdlinelibs>` +* :mod:`shlex` — :doc:`Unix Specific Services <./unix>` +* :mod:`turtle` — :doc:`Graphical User Interfaces with Tk <./tk>` diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7e367a0f2b6b25..c66486d3908aa7 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -54,7 +54,7 @@ are always available. They are listed here in alphabetical order. .. |func-bytearray| replace:: ``bytearray()`` .. |func-bytes| replace:: ``bytes()`` -.. function:: abs(x) +.. function:: abs(number, /) Return the absolute value of a number. The argument may be an integer, a floating-point number, or an object implementing @@ -62,7 +62,7 @@ are always available. They are listed here in alphabetical order. If the argument is a complex number, its magnitude is returned. -.. function:: aiter(async_iterable) +.. function:: aiter(async_iterable, /) Return an :term:`asynchronous iterator` for an :term:`asynchronous iterable`. Equivalent to calling ``x.__aiter__()``. @@ -71,7 +71,7 @@ are always available. They are listed here in alphabetical order. .. versionadded:: 3.10 -.. function:: all(iterable) +.. function:: all(iterable, /) Return ``True`` if all elements of the *iterable* are true (or if the iterable is empty). Equivalent to:: @@ -83,8 +83,8 @@ are always available. They are listed here in alphabetical order. return True -.. awaitablefunction:: anext(async_iterator) - anext(async_iterator, default) +.. awaitablefunction:: anext(async_iterator, /) + anext(async_iterator, default, /) When awaited, return the next item from the given :term:`asynchronous iterator`, or *default* if given and the iterator is exhausted. @@ -99,7 +99,7 @@ are always available. They are listed here in alphabetical order. .. versionadded:: 3.10 -.. function:: any(iterable) +.. function:: any(iterable, /) Return ``True`` if any element of the *iterable* is true. If the iterable is empty, return ``False``. Equivalent to:: @@ -111,7 +111,7 @@ are always available. They are listed here in alphabetical order. return False -.. function:: ascii(object) +.. function:: ascii(object, /) As :func:`repr`, return a string containing a printable representation of an object, but escape the non-ASCII characters in the string returned by @@ -119,10 +119,10 @@ are always available. They are listed here in alphabetical order. similar to that returned by :func:`repr` in Python 2. -.. function:: bin(x) +.. function:: bin(integer, /) Convert an integer number to a binary string prefixed with "0b". The result - is a valid Python expression. If *x* is not a Python :class:`int` object, it + is a valid Python expression. If *integer* is not a Python :class:`int` object, it has to define an :meth:`~object.__index__` method that returns an integer. Some examples: @@ -183,8 +183,7 @@ are always available. They are listed here in alphabetical order. .. _func-bytearray: .. class:: bytearray(source=b'') - bytearray(source, encoding) - bytearray(source, encoding, errors) + bytearray(source, encoding, errors='strict') :noindex: Return a new array of bytes. The :class:`bytearray` class is a mutable @@ -215,8 +214,7 @@ are always available. They are listed here in alphabetical order. .. _func-bytes: .. class:: bytes(source=b'') - bytes(source, encoding) - bytes(source, encoding, errors) + bytes(source, encoding, errors='strict') :noindex: Return a new "bytes" object which is an immutable sequence of integers in @@ -231,7 +229,7 @@ are always available. They are listed here in alphabetical order. See also :ref:`binaryseq`, :ref:`typebytes`, and :ref:`bytes-methods`. -.. function:: callable(object) +.. function:: callable(object, /) Return :const:`True` if the *object* argument appears callable, :const:`False` if not. If this returns ``True``, it is still possible that a @@ -244,14 +242,14 @@ are always available. They are listed here in alphabetical order. in Python 3.2. -.. function:: chr(i) +.. function:: chr(codepoint, /) - Return the string representing a character whose Unicode code point is the - integer *i*. For example, ``chr(97)`` returns the string ``'a'``, while + Return the string representing a character with the specified Unicode code point. + For example, ``chr(97)`` returns the string ``'a'``, while ``chr(8364)`` returns the string ``'€'``. This is the inverse of :func:`ord`. The valid range for the argument is from 0 through 1,114,111 (0x10FFFF in - base 16). :exc:`ValueError` will be raised if *i* is outside that range. + base 16). :exc:`ValueError` will be raised if it is outside that range. .. decorator:: classmethod @@ -458,7 +456,7 @@ are always available. They are listed here in alphabetical order. deprecated; it should only be passed as a single positional argument. -.. function:: delattr(object, name) +.. function:: delattr(object, name, /) This is a relative of :func:`setattr`. The arguments are an object and a string. The string must be the name of one of the object's attributes. The @@ -468,9 +466,9 @@ are always available. They are listed here in alphabetical order. .. _func-dict: -.. class:: dict(**kwarg) - dict(mapping, **kwarg) - dict(iterable, **kwarg) +.. class:: dict(**kwargs) + dict(mapping, /, **kwargs) + dict(iterable, /, **kwargs) :noindex: Create a new dictionary. The :class:`dict` object is the dictionary class. @@ -481,7 +479,7 @@ are always available. They are listed here in alphabetical order. .. function:: dir() - dir(object) + dir(object, /) Without arguments, return the list of names in the current local scope. With an argument, attempt to return a list of valid attributes for that object. @@ -541,7 +539,7 @@ are always available. They are listed here in alphabetical order. class. -.. function:: divmod(a, b) +.. function:: divmod(a, b, /) Take two (non-complex) numbers as arguments and return a pair of numbers consisting of their quotient and remainder when using integer division. With @@ -729,7 +727,7 @@ are always available. They are listed here in alphabetical order. described for the :func:`locals` builtin. -.. function:: filter(function, iterable) +.. function:: filter(function, iterable, /) Construct an iterator from those elements of *iterable* for which *function* is true. *iterable* may be either a sequence, a container which @@ -823,7 +821,7 @@ are always available. They are listed here in alphabetical order. single: __format__ single: string; format() (built-in function) -.. function:: format(value, format_spec="") +.. function:: format(value, format_spec="", /) Convert a *value* to a "formatted" representation, as controlled by *format_spec*. The interpretation of *format_spec* will depend on the type @@ -846,7 +844,7 @@ are always available. They are listed here in alphabetical order. .. _func-frozenset: -.. class:: frozenset(iterable=set()) +.. class:: frozenset(iterable=(), /) :noindex: Return a new :class:`frozenset` object, optionally with elements taken from @@ -858,8 +856,8 @@ are always available. They are listed here in alphabetical order. module. -.. function:: getattr(object, name) - getattr(object, name, default) +.. function:: getattr(object, name, /) + getattr(object, name, default, /) Return the value of the named attribute of *object*. *name* must be a string. If the string is the name of one of the object's attributes, the result is the @@ -883,7 +881,7 @@ are always available. They are listed here in alphabetical order. regardless of where the function is called. -.. function:: hasattr(object, name) +.. function:: hasattr(object, name, /) The arguments are an object and a string. The result is ``True`` if the string is the name of one of the object's attributes, ``False`` if not. (This @@ -891,7 +889,7 @@ are always available. They are listed here in alphabetical order. raises an :exc:`AttributeError` or not.) -.. function:: hash(object) +.. function:: hash(object, /) Return the hash value of the object (if it has one). Hash values are integers. They are used to quickly compare dictionary keys during a @@ -926,10 +924,10 @@ are always available. They are listed here in alphabetical order. signatures for callables are now more comprehensive and consistent. -.. function:: hex(x) +.. function:: hex(integer, /) Convert an integer number to a lowercase hexadecimal string prefixed with - "0x". If *x* is not a Python :class:`int` object, it has to define an + "0x". If *integer* is not a Python :class:`int` object, it has to define an :meth:`~object.__index__` method that returns an integer. Some examples: >>> hex(255) @@ -958,7 +956,7 @@ are always available. They are listed here in alphabetical order. :meth:`float.hex` method. -.. function:: id(object) +.. function:: id(object, /) Return the "identity" of an object. This is an integer which is guaranteed to be unique and constant for this object during its lifetime. @@ -971,7 +969,7 @@ are always available. They are listed here in alphabetical order. .. function:: input() - input(prompt) + input(prompt, /) If the *prompt* argument is present, it is written to standard output without a trailing newline. The function then reads a line from input, converts it @@ -1071,7 +1069,7 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.14 :func:`int` no longer delegates to the :meth:`~object.__trunc__` method. -.. function:: isinstance(object, classinfo) +.. function:: isinstance(object, classinfo, /) Return ``True`` if the *object* argument is an instance of the *classinfo* argument, or of a (direct, indirect, or :term:`virtual `) of *classinfo*. A @@ -1102,19 +1100,19 @@ are always available. They are listed here in alphabetical order. *classinfo* can be a :ref:`types-union`. -.. function:: iter(object) - iter(object, sentinel) +.. function:: iter(iterable, /) + iter(callable, sentinel, /) Return an :term:`iterator` object. The first argument is interpreted very differently depending on the presence of the second argument. Without a - second argument, *object* must be a collection object which supports the + second argument, the single argument must be a collection object which supports the :term:`iterable` protocol (the :meth:`~object.__iter__` method), or it must support the sequence protocol (the :meth:`~object.__getitem__` method with integer arguments starting at ``0``). If it does not support either of those protocols, :exc:`TypeError` is raised. If the second argument, *sentinel*, is given, - then *object* must be a callable object. The iterator created in this case - will call *object* with no arguments for each call to its + then the first argument must be a callable object. The iterator created in this case + will call *callable* with no arguments for each call to its :meth:`~iterator.__next__` method; if the value returned is equal to *sentinel*, :exc:`StopIteration` will be raised, otherwise the value will be returned. @@ -1131,7 +1129,7 @@ are always available. They are listed here in alphabetical order. process_block(block) -.. function:: len(s) +.. function:: len(object, /) Return the length (the number of items) of an object. The argument may be a sequence (such as a string, bytes, tuple, list, or range) or a collection @@ -1144,8 +1142,7 @@ are always available. They are listed here in alphabetical order. .. _func-list: -.. class:: list() - list(iterable) +.. class:: list(iterable=(), /) :noindex: Rather than being a function, :class:`list` is actually a mutable @@ -1154,44 +1151,44 @@ are always available. They are listed here in alphabetical order. .. function:: locals() - Return a mapping object representing the current local symbol table, with - variable names as the keys, and their currently bound references as the - values. - - At module scope, as well as when using :func:`exec` or :func:`eval` with - a single namespace, this function returns the same namespace as - :func:`globals`. - - At class scope, it returns the namespace that will be passed to the - metaclass constructor. - - When using ``exec()`` or ``eval()`` with separate local and global - arguments, it returns the local namespace passed in to the function call. - - In all of the above cases, each call to ``locals()`` in a given frame of - execution will return the *same* mapping object. Changes made through - the mapping object returned from ``locals()`` will be visible as assigned, - reassigned, or deleted local variables, and assigning, reassigning, or - deleting local variables will immediately affect the contents of the - returned mapping object. - - In an :term:`optimized scope` (including functions, generators, and - coroutines), each call to ``locals()`` instead returns a fresh dictionary - containing the current bindings of the function's local variables and any - nonlocal cell references. In this case, name binding changes made via the - returned dict are *not* written back to the corresponding local variables - or nonlocal cell references, and assigning, reassigning, or deleting local - variables and nonlocal cell references does *not* affect the contents - of previously returned dictionaries. - - Calling ``locals()`` as part of a comprehension in a function, generator, or - coroutine is equivalent to calling it in the containing scope, except that - the comprehension's initialised iteration variables will be included. In - other scopes, it behaves as if the comprehension were running as a nested - function. - - Calling ``locals()`` as part of a generator expression is equivalent to - calling it in a nested generator function. + Return a mapping object representing the current local symbol table, with + variable names as the keys, and their currently bound references as the + values. + + At module scope, as well as when using :func:`exec` or :func:`eval` with + a single namespace, this function returns the same namespace as + :func:`globals`. + + At class scope, it returns the namespace that will be passed to the + metaclass constructor. + + When using ``exec()`` or ``eval()`` with separate local and global + arguments, it returns the local namespace passed in to the function call. + + In all of the above cases, each call to ``locals()`` in a given frame of + execution will return the *same* mapping object. Changes made through + the mapping object returned from ``locals()`` will be visible as assigned, + reassigned, or deleted local variables, and assigning, reassigning, or + deleting local variables will immediately affect the contents of the + returned mapping object. + + In an :term:`optimized scope` (including functions, generators, and + coroutines), each call to ``locals()`` instead returns a fresh dictionary + containing the current bindings of the function's local variables and any + nonlocal cell references. In this case, name binding changes made via the + returned dict are *not* written back to the corresponding local variables + or nonlocal cell references, and assigning, reassigning, or deleting local + variables and nonlocal cell references does *not* affect the contents + of previously returned dictionaries. + + Calling ``locals()`` as part of a comprehension in a function, generator, or + coroutine is equivalent to calling it in the containing scope, except that + the comprehension's initialised iteration variables will be included. In + other scopes, it behaves as if the comprehension were running as a nested + function. + + Calling ``locals()`` as part of a generator expression is equivalent to + calling it in a nested generator function. .. versionchanged:: 3.12 The behaviour of ``locals()`` in a comprehension has been updated as @@ -1220,9 +1217,9 @@ are always available. They are listed here in alphabetical order. Added the *strict* parameter. -.. function:: max(iterable, *, key=None) - max(iterable, *, default, key=None) - max(arg1, arg2, *args, key=None) +.. function:: max(iterable, /, *, key=None) + max(iterable, /, *, default, key=None) + max(arg1, arg2, /, *args, key=None) Return the largest item in an iterable or the largest of two or more arguments. @@ -1258,9 +1255,9 @@ are always available. They are listed here in alphabetical order. :ref:`typememoryview` for more information. -.. function:: min(iterable, *, key=None) - min(iterable, *, default, key=None) - min(arg1, arg2, *args, key=None) +.. function:: min(iterable, /, *, key=None) + min(iterable, /, *, default, key=None) + min(arg1, arg2, /, *args, key=None) Return the smallest item in an iterable or the smallest of two or more arguments. @@ -1288,8 +1285,8 @@ are always available. They are listed here in alphabetical order. The *key* can be ``None``. -.. function:: next(iterator) - next(iterator, default) +.. function:: next(iterator, /) + next(iterator, default, /) Retrieve the next item from the :term:`iterator` by calling its :meth:`~iterator.__next__` method. If *default* is given, it is returned @@ -1310,10 +1307,10 @@ are always available. They are listed here in alphabetical order. :class:`object`. -.. function:: oct(x) +.. function:: oct(integer, /) Convert an integer number to an octal string prefixed with "0o". The result - is a valid Python expression. If *x* is not a Python :class:`int` object, it + is a valid Python expression. If *integer* is not a Python :class:`int` object, it has to define an :meth:`~object.__index__` method that returns an integer. For example: @@ -1562,13 +1559,19 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.11 The ``'U'`` mode has been removed. -.. function:: ord(c) +.. function:: ord(character, /) - Given a string representing one Unicode character, return an integer - representing the Unicode code point of that character. For example, + Return the ordinal value of a character. + + If the argument is a one-character string, return the Unicode code point + of that character. For example, ``ord('a')`` returns the integer ``97`` and ``ord('€')`` (Euro sign) returns ``8364``. This is the inverse of :func:`chr`. + If the argument is a :class:`bytes` or :class:`bytearray` object of + length 1, return its single byte value. + For example, ``ord(b'a')`` returns the integer ``97``. + .. function:: pow(base, exp, mod=None) @@ -1577,7 +1580,7 @@ are always available. They are listed here in alphabetical order. ``pow(base, exp) % mod``). The two-argument form ``pow(base, exp)`` is equivalent to using the power operator: ``base**exp``. - The arguments must have numeric types. With mixed operand types, the + When arguments are builtin numeric types with mixed operand types, the coercion rules for binary arithmetic operators apply. For :class:`int` operands, the result has the same type as the operands (after coercion) unless the second argument is negative; in that case, all arguments are @@ -1729,15 +1732,15 @@ are always available. They are listed here in alphabetical order. .. _func-range: -.. class:: range(stop) - range(start, stop, step=1) +.. class:: range(stop, /) + range(start, stop, step=1, /) :noindex: Rather than being a function, :class:`range` is actually an immutable sequence type, as documented in :ref:`typesseq-range` and :ref:`typesseq`. -.. function:: repr(object) +.. function:: repr(object, /) Return a string containing a printable representation of an object. For many types, this function makes an attempt to return a string that would yield an @@ -1761,9 +1764,9 @@ are always available. They are listed here in alphabetical order. return f"Person('{self.name}', {self.age})" -.. function:: reversed(seq) +.. function:: reversed(object, /) - Return a reverse :term:`iterator`. *seq* must be an object which has + Return a reverse :term:`iterator`. The argument must be an object which has a :meth:`~object.__reversed__` method or supports the sequence protocol (the :meth:`~object.__len__` method and the :meth:`~object.__getitem__` method with integer arguments starting at ``0``). @@ -1797,8 +1800,7 @@ are always available. They are listed here in alphabetical order. .. _func-set: -.. class:: set() - set(iterable) +.. class:: set(iterable=(), /) :noindex: Return a new :class:`set` object, optionally with elements taken from @@ -1810,7 +1812,7 @@ are always available. They are listed here in alphabetical order. module. -.. function:: setattr(object, name, value) +.. function:: setattr(object, name, value, /) This is the counterpart of :func:`getattr`. The arguments are an object, a string, and an arbitrary value. The string may name an existing attribute or a @@ -1832,22 +1834,22 @@ are always available. They are listed here in alphabetical order. :func:`setattr`. -.. class:: slice(stop) - slice(start, stop, step=None) +.. class:: slice(stop, /) + slice(start, stop, step=None, /) Return a :term:`slice` object representing the set of indices specified by ``range(start, stop, step)``. The *start* and *step* arguments default to ``None``. + Slice objects have read-only data attributes :attr:`!start`, + :attr:`!stop`, and :attr:`!step` which merely return the argument + values (or their default). They have no other explicit functionality; + however, they are used by NumPy and other third-party packages. + .. attribute:: slice.start .. attribute:: slice.stop .. attribute:: slice.step - Slice objects have read-only data attributes :attr:`!start`, - :attr:`!stop`, and :attr:`!step` which merely return the argument - values (or their default). They have no other explicit functionality; - however, they are used by NumPy and other third-party packages. - Slice objects are also generated when extended indexing syntax is used. For example: ``a[start:stop:step]`` or ``a[start:stop, i]``. See :func:`itertools.islice` for an alternate version that returns an @@ -1885,7 +1887,7 @@ are always available. They are listed here in alphabetical order. the same data with other ordering tools such as :func:`max` that rely on a different underlying method. Implementing all six comparisons also helps avoid confusion for mixed type comparisons which can call - reflected the :meth:`~object.__gt__` method. + the reflected :meth:`~object.__gt__` method. For sorting examples and a brief sorting tutorial, see :ref:`sortinghowto`. @@ -1938,8 +1940,10 @@ are always available. They are listed here in alphabetical order. single: string; str() (built-in function) .. _func-str: -.. class:: str(object='') - str(object=b'', encoding='utf-8', errors='strict') +.. class:: str(*, encoding='utf-8', errors='strict') + str(object) + str(object, encoding, errors='strict') + str(object, *, errors) :noindex: Return a :class:`str` version of *object*. See :func:`str` for details. @@ -1972,7 +1976,7 @@ are always available. They are listed here in alphabetical order. .. class:: super() - super(type, object_or_type=None) + super(type, object_or_type=None, /) Return a proxy object that delegates method calls to a parent or sibling class of *type*. This is useful for accessing inherited methods that have @@ -2054,16 +2058,15 @@ are always available. They are listed here in alphabetical order. .. _func-tuple: -.. class:: tuple() - tuple(iterable) +.. class:: tuple(iterable=(), /) :noindex: Rather than being a function, :class:`tuple` is actually an immutable sequence type, as documented in :ref:`typesseq-tuple` and :ref:`typesseq`. -.. class:: type(object) - type(name, bases, dict, **kwds) +.. class:: type(object, /) + type(name, bases, dict, /, **kwargs) .. index:: pair: object; type @@ -2106,7 +2109,7 @@ are always available. They are listed here in alphabetical order. longer use the one-argument form to get the type of an object. .. function:: vars() - vars(object) + vars(object, /) Return the :attr:`~object.__dict__` attribute for a module, class, instance, or any other object with a :attr:`!__dict__` attribute. diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 3a933dff057bbb..01db54bbb86c4f 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -42,11 +42,11 @@ The :mod:`functools` module defines the following functions: def factorial(n): return n * factorial(n-1) if n else 1 - >>> factorial(10) # no previously cached result, makes 11 recursive calls + >>> factorial(10) # no previously cached result, makes 11 recursive calls 3628800 - >>> factorial(5) # just looks up cached value result + >>> factorial(5) # no new calls, just returns the cached result 120 - >>> factorial(12) # makes two new recursive calls, the other 10 are cached + >>> factorial(12) # two new recursive calls, factorial(10) is cached 479001600 The cache is threadsafe so that the wrapped function can be used in @@ -199,12 +199,18 @@ The :mod:`functools` module defines the following functions: and *typed*. This is for information purposes only. Mutating the values has no effect. + .. method:: lru_cache.cache_info() + :no-typesetting: + To help measure the effectiveness of the cache and tune the *maxsize* - parameter, the wrapped function is instrumented with a :func:`cache_info` + parameter, the wrapped function is instrumented with a :func:`!cache_info` function that returns a :term:`named tuple` showing *hits*, *misses*, *maxsize* and *currsize*. - The decorator also provides a :func:`cache_clear` function for clearing or + .. method:: lru_cache.cache_clear() + :no-typesetting: + + The decorator also provides a :func:`!cache_clear` function for clearing or invalidating the cache. The original underlying function is accessible through the @@ -284,9 +290,9 @@ The :mod:`functools` module defines the following functions: class decorator supplies the rest. This simplifies the effort involved in specifying all of the possible rich comparison operations: - The class must define one of :meth:`__lt__`, :meth:`__le__`, - :meth:`__gt__`, or :meth:`__ge__`. - In addition, the class should supply an :meth:`__eq__` method. + The class must define one of :meth:`~object.__lt__`, :meth:`~object.__le__`, + :meth:`~object.__gt__`, or :meth:`~object.__ge__`. + In addition, the class should supply an :meth:`~object.__eq__` method. For example:: @@ -403,8 +409,7 @@ The :mod:`functools` module defines the following functions: >>> remove_first_dear(message) 'Hello, dear world!' - :data:`!Placeholder` has no special treatment when used in a keyword - argument to :func:`!partial`. + :data:`!Placeholder` cannot be passed to :func:`!partial` as a keyword argument. .. versionchanged:: 3.14 Added support for :data:`Placeholder` in positional arguments. @@ -419,7 +424,7 @@ The :mod:`functools` module defines the following functions: like normal functions, are handled as descriptors). When *func* is a descriptor (such as a normal Python function, - :func:`classmethod`, :func:`staticmethod`, :func:`abstractmethod` or + :func:`classmethod`, :func:`staticmethod`, :func:`~abc.abstractmethod` or another instance of :class:`partialmethod`), calls to ``__get__`` are delegated to the underlying descriptor, and an appropriate :ref:`partial object` returned as the result. @@ -500,7 +505,10 @@ The :mod:`functools` module defines the following functions: ... print("Let me just say,", end=" ") ... print(arg) - To add overloaded implementations to the function, use the :func:`register` + .. method:: singledispatch.register() + :no-typesetting: + + To add overloaded implementations to the function, use the :func:`!register` attribute of the generic function, which can be used as a decorator. For functions annotated with types, the decorator will infer the type of the first argument automatically:: @@ -566,14 +574,14 @@ The :mod:`functools` module defines the following functions: runtime impact. To enable registering :term:`lambdas` and pre-existing functions, - the :func:`register` attribute can also be used in a functional form:: + the :func:`~singledispatch.register` attribute can also be used in a functional form:: >>> def nothing(arg, verbose=False): ... print("Nothing.") ... >>> fun.register(type(None), nothing) - The :func:`register` attribute returns the undecorated function. This + The :func:`~singledispatch.register` attribute returns the undecorated function. This enables decorator stacking, :mod:`pickling`, and the creation of unit tests for each variant independently:: @@ -651,10 +659,10 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.4 .. versionchanged:: 3.7 - The :func:`register` attribute now supports using type annotations. + The :func:`~singledispatch.register` attribute now supports using type annotations. .. versionchanged:: 3.11 - The :func:`register` attribute now supports + The :func:`~singledispatch.register` attribute now supports :class:`typing.Union` as a type annotation. @@ -664,7 +672,7 @@ The :mod:`functools` module defines the following functions: dispatch>` :term:`generic function`. To define a generic method, decorate it with the ``@singledispatchmethod`` - decorator. When defining a function using ``@singledispatchmethod``, note + decorator. When defining a method using ``@singledispatchmethod``, note that the dispatch happens on the type of the first non-*self* or non-*cls* argument:: @@ -682,7 +690,7 @@ The :mod:`functools` module defines the following functions: return not arg ``@singledispatchmethod`` supports nesting with other decorators such as - :func:`@classmethod`. Note that to allow for + :deco:`classmethod`. Note that to allow for ``dispatcher.register``, ``singledispatchmethod`` must be the *outer most* decorator. Here is the ``Negator`` class with the ``neg`` methods bound to the class, rather than an instance of the class:: @@ -704,8 +712,7 @@ The :mod:`functools` module defines the following functions: return not arg The same pattern can be used for other similar decorators: - :func:`@staticmethod`, - :func:`@abstractmethod`, and others. + :deco:`staticmethod`, :deco:`~abc.abstractmethod`, and others. .. versionadded:: 3.8 @@ -784,7 +791,7 @@ The :mod:`functools` module defines the following functions: 'Docstring' Without the use of this decorator factory, the name of the example function - would have been ``'wrapper'``, and the docstring of the original :func:`example` + would have been ``'wrapper'``, and the docstring of the original :func:`!example` would have been lost. diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 7ccb0e6bdf9406..35c5c7350cf8ed 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -60,7 +60,7 @@ The :mod:`gc` module provides the following functions: The effect of calling ``gc.collect()`` while the interpreter is already performing a collection is undefined. - .. versionchanged:: 3.13 + .. versionchanged:: 3.14 ``generation=1`` performs an increment of collection. @@ -83,13 +83,13 @@ The :mod:`gc` module provides the following functions: returned. If *generation* is not ``None``, return only the objects as follows: * 0: All objects in the young generation - * 1: No objects, as there is no generation 1 (as of Python 3.13) + * 1: No objects, as there is no generation 1 (as of Python 3.14) * 2: All objects in the old generation .. versionchanged:: 3.8 New *generation* parameter. - .. versionchanged:: 3.13 + .. versionchanged:: 3.14 Generation 1 is removed .. audit-event:: gc.get_objects generation gc.get_objects @@ -140,9 +140,9 @@ The :mod:`gc` module provides the following functions: *threshold2* is ignored. - See `Garbage collector design `_ for more information. + See `Garbage collector design `_ for more information. - .. versionchanged:: 3.13 + .. versionchanged:: 3.14 *threshold2* is ignored diff --git a/Doc/library/getpass.rst b/Doc/library/getpass.rst index 0fb0fc88683c03..a0c0c6dee2d513 100644 --- a/Doc/library/getpass.rst +++ b/Doc/library/getpass.rst @@ -27,9 +27,9 @@ The :mod:`getpass` module provides two functions: The *echo_char* argument controls how user input is displayed while typing. If *echo_char* is ``None`` (default), input remains hidden. Otherwise, - *echo_char* must be a printable ASCII string and each typed character - is replaced by it. For example, ``echo_char='*'`` will display - asterisks instead of the actual input. + *echo_char* must be a single printable ASCII character and each + typed character is replaced by it. For example, ``echo_char='*'`` will + display asterisks instead of the actual input. If echo free input is unavailable getpass() falls back to printing a warning message to *stream* and reading from ``sys.stdin`` and @@ -39,6 +39,14 @@ The :mod:`getpass` module provides two functions: If you call getpass from within IDLE, the input may be done in the terminal you launched IDLE from rather than the idle window itself. + .. note:: + On Unix systems, when *echo_char* is set, the terminal will be + configured to operate in + :manpage:`noncanonical mode `. + In particular, this means that line editing shortcuts such as + :kbd:`Ctrl+U` will not work and may insert unexpected characters into + the input. + .. versionchanged:: 3.14 Added the *echo_char* parameter for keyboard feedback. diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 59ad1b07f27338..52c44928153337 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -18,23 +18,27 @@ single: - (minus); in glob-style wildcards single: . (dot); in glob-style wildcards -The :mod:`glob` module finds all the pathnames matching a specified pattern -according to the rules used by the Unix shell, although results are returned in -arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character +The :mod:`!glob` module finds pathnames +using pattern matching rules similar to the Unix shell. +No tilde expansion is done, but ``*``, ``?``, and character ranges expressed with ``[]`` will be correctly matched. This is done by using the :func:`os.scandir` and :func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a subshell. -Note that files beginning with a dot (``.``) can only be matched by +.. note:: + The pathnames are returned in no particular order. If you need a specific + order, sort the results. + +Files beginning with a dot (``.``) can only be matched by patterns that also start with a dot, unlike :func:`fnmatch.fnmatch` or :func:`pathlib.Path.glob`. -(For tilde and shell variable expansion, use :func:`os.path.expanduser` and -:func:`os.path.expandvars`.) +For tilde and shell variable expansion, use :func:`os.path.expanduser` and +:func:`os.path.expandvars`. For a literal match, wrap the meta-characters in brackets. For example, ``'[?]'`` matches the character ``'?'``. -The :mod:`glob` module defines the following functions: +The :mod:`!glob` module defines the following functions: .. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \ @@ -51,7 +55,7 @@ The :mod:`glob` module defines the following functions: If *root_dir* is not ``None``, it should be a :term:`path-like object` specifying the root directory for searching. It has the same effect on - :func:`glob` as changing the current directory before calling it. If + :func:`!glob` as changing the current directory before calling it. If *pathname* is relative, the result will contain paths relative to *root_dir*. diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index c9d96085ef739d..76de92b30692b3 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -11,6 +11,8 @@ This module provides a simple interface to compress and decompress files just like the GNU programs :program:`gzip` and :program:`gunzip` would. +.. include:: ../includes/optional-module.rst + The data compression is provided by the :mod:`zlib` module. The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the @@ -267,7 +269,7 @@ Example of how to GZIP compress a binary string:: .. _gzip-cli: -Command Line Interface +Command-line interface ---------------------- The :mod:`gzip` module provides a simple command line interface to compress or @@ -280,7 +282,7 @@ Once executed the :mod:`gzip` module keeps the input file(s). Add a new command line interface with a usage. By default, when you will execute the CLI, the default compression level is 6. -Command line options +Command-line options ^^^^^^^^^^^^^^^^^^^^ .. option:: file diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index bb2d2fad23bdb8..855a6efc9f781c 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -284,7 +284,7 @@ a file or file-like object. Example: >>> import io, hashlib, hmac - >>> with open(hashlib.__file__, "rb") as f: + >>> with open("library/hashlib.rst", "rb") as f: ... digest = hashlib.file_digest(f, "sha256") ... >>> digest.hexdigest() # doctest: +ELLIPSIS @@ -303,7 +303,7 @@ a file or file-like object. .. versionadded:: 3.11 .. versionchanged:: 3.14 - Now raises a :exc:`BlockingIOError` if the file is opened in blocking + Now raises a :exc:`BlockingIOError` if the file is opened in non-blocking mode. Previously, spurious null bytes were added to the digest. diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 922ba0c8aa4214..4b3ef6df56adca 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -58,6 +58,11 @@ functions, respectively. The following functions are provided for min-heaps: +.. function:: heapify(x) + + Transform list *x* into a min-heap, in-place, in linear time. + + .. function:: heappush(heap, item) Push the value *item* onto the *heap*, maintaining the min-heap invariant. @@ -77,11 +82,6 @@ The following functions are provided for min-heaps: followed by a separate call to :func:`heappop`. -.. function:: heapify(x) - - Transform list *x* into a min-heap, in-place, in linear time. - - .. function:: heapreplace(heap, item) Pop and return the smallest item from the *heap*, and also push the new *item*. diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst index d6692033b2d4c3..d5608bd7543eb1 100644 --- a/Doc/library/hmac.rst +++ b/Doc/library/hmac.rst @@ -12,6 +12,9 @@ -------------- This module implements the HMAC algorithm as described by :rfc:`2104`. +The interface allows to use any hash function with a *fixed* digest size. +In particular, extendable output functions such as SHAKE-128 or SHAKE-256 +cannot be used with HMAC. .. function:: new(key, msg=None, digestmod) @@ -47,7 +50,9 @@ This module implements the HMAC algorithm as described by :rfc:`2104`. .. versionadded:: 3.7 -An HMAC object has the following methods: +.. class:: HMAC + + An HMAC object has the following methods: .. method:: HMAC.update(msg) diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst index 6d433b5a04fc4a..341a8337ba2ceb 100644 --- a/Doc/library/html.parser.rst +++ b/Doc/library/html.parser.rst @@ -15,14 +15,18 @@ This module defines a class :class:`HTMLParser` which serves as the basis for parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML. -.. class:: HTMLParser(*, convert_charrefs=True) +.. class:: HTMLParser(*, convert_charrefs=True, scripting=False) Create a parser instance able to parse invalid markup. - If *convert_charrefs* is ``True`` (the default), all character - references (except the ones in ``script``/``style`` elements) are + If *convert_charrefs* is true (the default), all character + references (except the ones in elements like ``script`` and ``style``) are automatically converted to the corresponding Unicode characters. + If *scripting* is false (the default), the content of the ``noscript`` + element is parsed normally; if it's true, it's returned as is without + being parsed. + An :class:`.HTMLParser` instance is fed HTML data and calls handler methods when start tags, end tags, text, comments, and other markup elements are encountered. The user should subclass :class:`.HTMLParser` and override its @@ -37,13 +41,18 @@ parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML. .. versionchanged:: 3.5 The default value for argument *convert_charrefs* is now ``True``. + .. versionchanged:: 3.14.1 + Added the *scripting* parameter. + Example HTML Parser Application ------------------------------- As a basic example, below is a simple HTML parser that uses the :class:`HTMLParser` class to print out start tags, end tags, and data -as they are encountered:: +as they are encountered: + +.. testcode:: from html.parser import HTMLParser @@ -63,7 +72,7 @@ as they are encountered:: The output will then be: -.. code-block:: none +.. testoutput:: Encountered a start tag: html Encountered a start tag: head @@ -159,15 +168,15 @@ implementations do nothing (except for :meth:`~HTMLParser.handle_startendtag`): .. method:: HTMLParser.handle_data(data) This method is called to process arbitrary data (e.g. text nodes and the - content of ```` and ````). + content of elements like ``script`` and ``style``). .. method:: HTMLParser.handle_entityref(name) This method is called to process a named character reference of the form ``&name;`` (e.g. ``>``), where *name* is a general entity reference - (e.g. ``'gt'``). This method is never called if *convert_charrefs* is - ``True``. + (e.g. ``'gt'``). + This method is only called if *convert_charrefs* is false. .. method:: HTMLParser.handle_charref(name) @@ -175,8 +184,8 @@ implementations do nothing (except for :meth:`~HTMLParser.handle_startendtag`): This method is called to process decimal and hexadecimal numeric character references of the form :samp:`&#{NNN};` and :samp:`&#x{NNN};`. For example, the decimal equivalent for ``>`` is ``>``, whereas the hexadecimal is ``>``; - in this case the method will receive ``'62'`` or ``'x3E'``. This method - is never called if *convert_charrefs* is ``True``. + in this case the method will receive ``'62'`` or ``'x3E'``. + This method is only called if *convert_charrefs* is false. .. method:: HTMLParser.handle_comment(data) @@ -230,7 +239,9 @@ Examples -------- The following class implements a parser that will be used to illustrate more -examples:: +examples: + +.. testcode:: from html.parser import HTMLParser from html.entities import name2codepoint @@ -266,13 +277,17 @@ examples:: parser = MyHTMLParser() -Parsing a doctype:: +Parsing a doctype: + +.. doctest:: >>> parser.feed('') Decl : DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" -Parsing an element with a few attributes and a title:: +Parsing an element with a few attributes and a title: + +.. doctest:: >>> parser.feed('The Python logo') Start tag: img @@ -284,8 +299,10 @@ Parsing an element with a few attributes and a title:: Data : Python End tag : h1 -The content of ``script`` and ``style`` elements is returned as is, without -further parsing:: +The content of elements like ``script`` and ``style`` is returned as is, +without further parsing: + +.. doctest:: >>> parser.feed('') Start tag: style @@ -294,22 +311,31 @@ further parsing:: End tag : style >>> parser.feed('') + ... 'alert("hello! ☺");') Start tag: script attr: ('type', 'text/javascript') - Data : alert("hello!"); + Data : alert("hello! ☺"); End tag : script -Parsing comments:: +Parsing comments: - >>> parser.feed('' +.. doctest:: + + >>> parser.feed('' ... '') - Comment : a comment + Comment : a comment Comment : [if IE 9]>IE-specific content'``):: +correct char (note: these 3 references are all equivalent to ``'>'``): + +.. doctest:: + + >>> parser = MyHTMLParser() + >>> parser.feed('>>>') + Data : >>> + >>> parser = MyHTMLParser(convert_charrefs=False) >>> parser.feed('>>>') Named ent: > Num ent : > @@ -317,18 +343,22 @@ correct char (note: these 3 references are all equivalent to ``'>'``):: Feeding incomplete chunks to :meth:`~HTMLParser.feed` works, but :meth:`~HTMLParser.handle_data` might be called more than once -(unless *convert_charrefs* is set to ``True``):: +if *convert_charrefs* is false: + +.. doctest:: - >>> for chunk in ['buff', 'ered ', 'text']: + >>> for chunk in ['buff', 'ered', ' text']: ... parser.feed(chunk) ... Start tag: span Data : buff Data : ered - Data : text + Data : text End tag : span -Parsing invalid HTML (e.g. unquoted attributes) also works:: +Parsing invalid HTML (e.g. unquoted attributes) also works: + +.. doctest:: >>> parser.feed('

tag soup

') Start tag: p diff --git a/Doc/library/html.rst b/Doc/library/html.rst index 9aa39ba9a42b0f..65c49a4107a048 100644 --- a/Doc/library/html.rst +++ b/Doc/library/html.rst @@ -14,9 +14,12 @@ This module defines utilities to manipulate HTML. Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe sequences. Use this if you need to display text that might contain such - characters in HTML. If the optional flag *quote* is true, the characters - (``"``) and (``'``) are also translated; this helps for inclusion in an HTML - attribute value delimited by quotes, as in ````. + characters in HTML. If the optional flag *quote* is true (the default), the + characters (``"``) and (``'``) are also translated; this helps for inclusion + in an HTML attribute value delimited by quotes, as in ````. + If *quote* is set to false, the characters (``"``) and (``'``) are not + translated. + .. versionadded:: 3.2 diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index 2835c8d0eb711e..75aa818ab9f5d7 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -125,7 +125,7 @@ This module provides the following function: Parse the headers from a file pointer *fp* representing a HTTP request/response. The file has to be a :class:`~io.BufferedIOBase` reader - (i.e. not text) and must provide a valid :rfc:`2822` style header. + (i.e. not text) and must provide a valid :rfc:`5322` style header. This function returns an instance of :class:`http.client.HTTPMessage` that holds the header fields, but no payload diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst index 23ddecf873876d..fcb0069b760e59 100644 --- a/Doc/library/http.cookiejar.rst +++ b/Doc/library/http.cookiejar.rst @@ -12,7 +12,7 @@ -------------- The :mod:`http.cookiejar` module defines classes for automatic handling of HTTP -cookies. It is useful for accessing web sites that require small pieces of data +cookies. It is useful for accessing websites that require small pieces of data -- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a web server, and then returned to the server in later HTTP requests. @@ -570,7 +570,7 @@ Netscape protocol strictness switches: Don't allow setting cookies whose path doesn't path-match request URI. -:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by +:attr:`~DefaultCookiePolicy.strict_ns_domain` is a collection of flags. Its value is constructed by or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means both flags are set). diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index eb196320721194..46efc45c5e7d96 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -148,9 +148,12 @@ Morsel Objects in HTTP requests, and is not accessible through JavaScript. This is intended to mitigate some forms of cross-site scripting. - The attribute :attr:`samesite` specifies that the browser is not allowed to - send the cookie along with cross-site requests. This helps to mitigate CSRF - attacks. Valid values for this attribute are "Strict" and "Lax". + The attribute :attr:`samesite` controls when the browser sends the cookie with + cross-site requests. This helps to mitigate CSRF attacks. Valid values are + "Strict" (only sent with same-site requests), "Lax" (sent with same-site + requests and top-level navigations), and "None" (sent with same-site and + cross-site requests). When using "None", the "secure" attribute must also + be set, as required by modern browsers. The attribute :attr:`partitioned` indicates to user agents that these cross-site cookies *should* only be available in the same top-level context diff --git a/Doc/library/http.rst b/Doc/library/http.rst index ce3fb9f8120502..b0bdfc65e4508d 100644 --- a/Doc/library/http.rst +++ b/Doc/library/http.rst @@ -139,7 +139,8 @@ equal to the constant name (i.e. ``http.HTTPStatus.OK`` is also available as .. versionchanged:: 3.13 Implemented RFC9110 naming for status constants. Old constant names are preserved for - backwards compatibility. + backwards compatibility: ``413 REQUEST_ENTITY_TOO_LARGE``, ``414 REQUEST_URI_TOO_LONG``, + ``416 REQUESTED_RANGE_NOT_SATISFIABLE`` and ``422 UNPROCESSABLE_ENTITY``. HTTP status category -------------------- diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 54df4a7e804d50..41026e2b303645 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -154,7 +154,7 @@ instantiation, of which this module provides three different variants: variable. This instance parses and manages the headers in the HTTP request. The :func:`~http.client.parse_headers` function from :mod:`http.client` is used to parse the headers and it requires that the - HTTP request provide a valid :rfc:`2822` style header. + HTTP request provide a valid :rfc:`5322` style header. .. attribute:: rfile @@ -429,8 +429,7 @@ instantiation, of which this module provides three different variants: ``'Last-Modified:'`` header with the file's modification time. Then follows a blank line signifying the end of the headers, and then the - contents of the file are output. If the file's MIME type starts with - ``text/`` the file is opened in text mode; otherwise binary mode is used. + contents of the file are output. For example usage, see the implementation of the ``test`` function in :source:`Lib/http/server.py`. diff --git a/Doc/library/idle.rst b/Doc/library/idle.rst index fabea611e0ebcd..a16f46ef812400 100644 --- a/Doc/library/idle.rst +++ b/Doc/library/idle.rst @@ -13,7 +13,7 @@ IDLE --- Python editor and shell single: Integrated Development Environment .. - Remember to update Lib/idlelib/help.html with idlelib.help.copy_source() when modifying this file. + Remember to update Lib/idlelib/help.html with idlelib.help.copy_strip() when modifying this file. -------------- @@ -37,6 +37,10 @@ IDLE has the following features: * configuration, browsers, and other dialogs +The IDLE application is implemented in the :mod:`idlelib` package. + +.. include:: ../includes/optional-module.rst + Menus ----- @@ -88,7 +92,7 @@ Save Save As... Save the current window with a Save As dialog. The file saved becomes the - new associated file for the window. (If your file namager is set to hide + new associated file for the window. (If your file manager is set to hide extensions, the current extension will be omitted in the file name box. If the new filename has no '.', '.py' and '.txt' will be added for Python and text files, except that on macOS Aqua,'.py' is added for all files.) @@ -204,9 +208,9 @@ New Indent Width Open a dialog to change indent width. The accepted default by the Python community is 4 spaces. -Strip Trailing Chitespace +Strip Trailing Whitespace Remove trailing space and other whitespace characters after the last - non-whitespace character of a line by applying str.rstrip to each line, + non-whitespace character of a line by applying :meth:`str.rstrip` to each line, including lines within multiline strings. Except for Shell windows, remove extra newlines at the end of the file. @@ -657,7 +661,9 @@ looked for in the user's home directory. Statements in this file will be executed in the Tk namespace, so this file is not useful for importing functions to be used from IDLE's Python shell. -Command line usage +.. _idlelib-cli: + +Command-line usage ^^^^^^^^^^^^^^^^^^ .. program:: idle diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst index 9f198aebcb66b0..3eee2b9e607297 100644 --- a/Doc/library/imaplib.rst +++ b/Doc/library/imaplib.rst @@ -413,6 +413,9 @@ An :class:`IMAP4` instance has the following methods: the password. Will only work if the server ``CAPABILITY`` response includes the phrase ``AUTH=CRAM-MD5``. + .. versionchanged:: 3.14 + An :exc:`IMAP4.error` is raised if MD5 support is not available. + .. method:: IMAP4.logout() diff --git a/Doc/library/importlib.resources.abc.rst b/Doc/library/importlib.resources.abc.rst index 7a77466bcbaf27..8253a33f591a0b 100644 --- a/Doc/library/importlib.resources.abc.rst +++ b/Doc/library/importlib.resources.abc.rst @@ -49,44 +49,44 @@ .. method:: open_resource(resource) :abstractmethod: - Returns an opened, :term:`file-like object` for binary reading - of the *resource*. + Returns an opened, :term:`file-like object` for binary reading + of the *resource*. - If the resource cannot be found, :exc:`FileNotFoundError` is - raised. + If the resource cannot be found, :exc:`FileNotFoundError` is + raised. .. method:: resource_path(resource) :abstractmethod: - Returns the file system path to the *resource*. + Returns the file system path to the *resource*. - If the resource does not concretely exist on the file system, - raise :exc:`FileNotFoundError`. + If the resource does not concretely exist on the file system, + raise :exc:`FileNotFoundError`. .. method:: is_resource(name) :abstractmethod: - Returns ``True`` if the named *name* is considered a resource. - :exc:`FileNotFoundError` is raised if *name* does not exist. + Returns ``True`` if the named *name* is considered a resource. + :exc:`FileNotFoundError` is raised if *name* does not exist. .. method:: contents() :abstractmethod: - Returns an :term:`iterable` of strings over the contents of - the package. Do note that it is not required that all names - returned by the iterator be actual resources, e.g. it is - acceptable to return names for which :meth:`is_resource` would - be false. - - Allowing non-resource names to be returned is to allow for - situations where how a package and its resources are stored - are known a priori and the non-resource names would be useful. - For instance, returning subdirectory names is allowed so that - when it is known that the package and resources are stored on - the file system then those subdirectory names can be used - directly. - - The abstract method returns an iterable of no items. + Returns an :term:`iterable` of strings over the contents of + the package. Do note that it is not required that all names + returned by the iterator be actual resources, e.g. it is + acceptable to return names for which :meth:`is_resource` would + be false. + + Allowing non-resource names to be returned is to allow for + situations where how a package and its resources are stored + are known a priori and the non-resource names would be useful. + For instance, returning subdirectory names is allowed so that + when it is known that the package and resources are stored on + the file system then those subdirectory names can be used + directly. + + The abstract method returns an iterable of no items. .. class:: Traversable diff --git a/Doc/library/importlib.resources.rst b/Doc/library/importlib.resources.rst index e002198899c8b8..7a11f4fe069004 100644 --- a/Doc/library/importlib.resources.rst +++ b/Doc/library/importlib.resources.rst @@ -16,11 +16,12 @@ within *packages*. "Resources" are file-like resources associated with a module or package in Python. The resources may be contained directly in a package, within a subdirectory contained in that package, or adjacent to modules outside a -package. Resources may be text or binary. As a result, Python module sources -(.py) of a package and compilation artifacts (pycache) are technically -de-facto resources of that package. In practice, however, resources are -primarily those non-Python artifacts exposed specifically by the package -author. +package. Resources may be text or binary. As a result, a package's Python +module sources (.py), compilation artifacts (pycache), and installation +artifacts (like :func:`reserved filenames ` +in directories) are technically de-facto resources of that package. +In practice, however, resources are primarily those non-Python artifacts +exposed specifically by the package author. Resources can be opened or read in either binary or text mode. diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index ea5a77028683b3..743d617fbe77a0 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -206,6 +206,10 @@ Functions :exc:`ModuleNotFoundError` is raised when the module being reloaded lacks a :class:`~importlib.machinery.ModuleSpec`. + .. warning:: + This function is not thread-safe. Calling it from multiple threads can result + in unexpected behavior. It's recommended to use the :class:`threading.Lock` + or other synchronization primitives for thread-safe module reloading. :mod:`importlib.abc` -- Abstract base classes related to import --------------------------------------------------------------- @@ -389,6 +393,8 @@ ABC hierarchy:: .. deprecated:: 3.7 This ABC is deprecated in favour of supporting resource loading through :class:`importlib.resources.abc.TraversableResources`. + This class exists for backwards compatibility only with other ABCs in + this module. .. method:: get_data(path) :abstractmethod: @@ -1247,7 +1253,7 @@ find and load modules. To accommodate this requirement, when running on iOS, extension module binaries are *not* packaged as ``.so`` files on ``sys.path``, but as individual standalone frameworks. To discover those frameworks, this loader - is be registered against the ``.fwork`` file extension, with a ``.fwork`` + is registered against the ``.fwork`` file extension, with a ``.fwork`` file acting as a placeholder in the original location of the binary on ``sys.path``. The ``.fwork`` file contains the path of the actual binary in the ``Frameworks`` folder, relative to the app bundle. To allow for diff --git a/Doc/library/index.rst b/Doc/library/index.rst index 44b218948d07e1..163e1679c65ef8 100644 --- a/Doc/library/index.rst +++ b/Doc/library/index.rst @@ -63,7 +63,6 @@ the `Python Package Index `_. internet.rst mm.rst i18n.rst - frameworks.rst tk.rst development.rst debug.rst diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index e8d1176f477866..c5af7d5de2837c 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -253,6 +253,9 @@ attributes (see :ref:`import-mod-attrs` for module attributes): +-----------------+-------------------+---------------------------+ | | gi_running | is the generator running? | +-----------------+-------------------+---------------------------+ +| | gi_suspended | is the generator | +| | | suspended? | ++-----------------+-------------------+---------------------------+ | | gi_code | code | +-----------------+-------------------+---------------------------+ | | gi_yieldfrom | object being iterated by | @@ -1179,7 +1182,7 @@ Classes and functions :func:`signature` in Python 3.5, but that decision has been reversed in order to restore a clearly supported standard interface for single-source Python 2/3 code migrating away from the legacy - :func:`getargspec` API. + :func:`!getargspec` API. .. versionchanged:: 3.7 Python only explicitly guaranteed that it preserved the declaration @@ -1289,6 +1292,11 @@ Classes and functions This is an alias for :func:`annotationlib.get_annotations`; see the documentation of that function for more information. + .. caution:: + + This function may execute arbitrary code contained in annotations. + See :ref:`annotationlib-security` for more information. + .. versionadded:: 3.10 .. versionchanged:: 3.14 @@ -1768,7 +1776,7 @@ Buffer flags .. _inspect-module-cli: -Command Line Interface +Command-line interface ---------------------- The :mod:`inspect` module also provides a basic introspection capability diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 3aa2f35f05eba0..de5cab5aee649f 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -528,14 +528,13 @@ I/O Base Classes It inherits from :class:`IOBase`. The main difference with :class:`RawIOBase` is that methods :meth:`read`, - :meth:`readinto` and :meth:`write` will try (respectively) to read as much - input as requested or to consume all given output, at the expense of - making perhaps more than one system call. + :meth:`readinto` and :meth:`write` will try (respectively) to read + as much input as requested or to emit all provided data. - In addition, those methods can raise :exc:`BlockingIOError` if the - underlying raw stream is in non-blocking mode and cannot take or give - enough data; unlike their :class:`RawIOBase` counterparts, they will - never return ``None``. + In addition, if the underlying raw stream is in non-blocking mode, when the + system returns would block :meth:`write` will raise :exc:`BlockingIOError` + with :attr:`BlockingIOError.characters_written` and :meth:`read` will return + data read so far or ``None`` if no data is available. Besides, the :meth:`read` method does not have a default implementation that defers to :meth:`readinto`. @@ -568,29 +567,40 @@ I/O Base Classes .. method:: read(size=-1, /) - Read and return up to *size* bytes. If the argument is omitted, ``None``, - or negative, data is read and returned until EOF is reached. An empty - :class:`bytes` object is returned if the stream is already at EOF. + Read and return up to *size* bytes. If the argument is omitted, ``None``, + or negative read as much as possible. - If the argument is positive, and the underlying raw stream is not - interactive, multiple raw reads may be issued to satisfy the byte count - (unless EOF is reached first). But for interactive raw streams, at most - one raw read will be issued, and a short result does not imply that EOF is - imminent. + Fewer bytes may be returned than requested. An empty :class:`bytes` object + is returned if the stream is already at EOF. More than one read may be + made and calls may be retried if specific errors are encountered, see + :meth:`os.read` and :pep:`475` for more details. Less than size bytes + being returned does not imply that EOF is imminent. - A :exc:`BlockingIOError` is raised if the underlying raw stream is in - non blocking-mode, and has no data available at the moment. + When reading as much as possible the default implementation will use + ``raw.readall`` if available (which should implement + :meth:`RawIOBase.readall`), otherwise will read in a loop until read + returns ``None``, an empty :class:`bytes`, or a non-retryable error. For + most streams this is to EOF, but for non-blocking streams more data may + become available. + + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: read1(size=-1, /) - Read and return up to *size* bytes, with at most one call to the - underlying raw stream's :meth:`~RawIOBase.read` (or - :meth:`~RawIOBase.readinto`) method. This can be useful if you are - implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, calling :meth:`~RawIOBase.readinto` + which may retry if :py:const:`~errno.EINTR` is encountered per + :pep:`475`. If *size* is ``-1`` or not provided, the implementation will + choose an arbitrary value for *size*. - If *size* is ``-1`` (the default), an arbitrary number of bytes are - returned (more than zero unless EOF is reached). + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: readinto(b, /) @@ -767,34 +777,21 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the stream without advancing the position. The number of + bytes returned may be less or more than requested. If the underlying raw + stream is non-blocking and the operation would block, returns empty bytes. .. method:: read(size=-1, /) - Read and return *size* bytes, or if *size* is not given or negative, until - EOF or if the read call would block in non-blocking mode. - - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read` .. method:: read1(size=-1, /) - Read and return up to *size* bytes with only one call on the raw stream. - If at least one byte is buffered, only buffered bytes are returned. - Otherwise, one raw stream read call is made. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read1` .. versionchanged:: 3.7 The *size* argument is now optional. - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. - .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) A buffered binary stream providing higher-level access to a writeable, non @@ -826,8 +823,8 @@ than raw I/O does. Write the :term:`bytes-like object`, *b*, and return the number of bytes written. When in non-blocking mode, a - :exc:`BlockingIOError` is raised if the buffer needs to be written out but - the raw stream blocks. + :exc:`BlockingIOError` with :attr:`BlockingIOError.characters_written` set + is raised if the buffer needs to be written out but the raw stream blocks. .. class:: BufferedRandom(raw, buffer_size=DEFAULT_BUFFER_SIZE) @@ -894,9 +891,10 @@ Text I/O .. attribute:: buffer - The underlying binary buffer (a :class:`BufferedIOBase` instance) that - :class:`TextIOBase` deals with. This is not part of the - :class:`TextIOBase` API and may not exist in some implementations. + The underlying binary buffer (a :class:`BufferedIOBase` + or :class:`RawIOBase` instance) that :class:`TextIOBase` deals with. + This is not part of the :class:`TextIOBase` API and may not exist + in some implementations. .. method:: detach() diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst index e5bdfbb144b65a..9e887d8e65741b 100644 --- a/Doc/library/ipaddress.rst +++ b/Doc/library/ipaddress.rst @@ -240,7 +240,16 @@ write code that handles both IP versions correctly. Address objects are .. attribute:: is_reserved - ``True`` if the address is otherwise IETF reserved. + ``True`` if the address is noted as reserved by the IETF. + For IPv4, this is only ``240.0.0.0/4``, the ``Reserved`` address block. + For IPv6, this is all addresses `allocated `__ as + ``Reserved by IETF`` for future use. + + .. note:: For IPv4, ``is_reserved`` is not related to the address block value of the + ``Reserved-by-Protocol`` column in iana-ipv4-special-registry_. + + .. caution:: For IPv6, ``fec0::/10`` a former Site-Local scoped address prefix is + currently excluded from that list (see :attr:`~IPv6Address.is_site_local` & :rfc:`3879`). .. attribute:: is_loopback @@ -261,6 +270,7 @@ write code that handles both IP versions correctly. Address objects are .. _iana-ipv4-special-registry: https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml .. _iana-ipv6-special-registry: https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml +.. _iana-ipv6-address-space: https://www.iana.org/assignments/ipv6-address-space/ipv6-address-space.xhtml .. method:: IPv4Address.__format__(fmt) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 00925ae920aad9..aa46920d3526f0 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -47,7 +47,7 @@ Iterator Arguments Results Iterator Arguments Results Example ============================ ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) → 1 3 6 10 15`` -:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=3) → ABC DEF G`` +:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=2) → AB CD EF G`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') → A B C D E F`` :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) → A B C D E F`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) → A C E F`` @@ -181,7 +181,7 @@ loops that truncate the stream. Roughly equivalent to:: def batched(iterable, n, *, strict=False): - # batched('ABCDEFG', 3) → ABC DEF G + # batched('ABCDEFG', 2) → AB CD EF G if n < 1: raise ValueError('n must be at least one') iterator = iter(iterable) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 26579ec6328860..8b4217c210d5b3 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,12 +18,17 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. note:: + The term "object" in the context of JSON processing in Python can be + ambiguous. All values in Python are objects. In JSON, an object refers to + any data wrapped in curly braces, similar to a Python dictionary. + .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory resources. Limiting the size of data to be parsed is recommended. -:mod:`json` exposes an API familiar to users of the standard library +This module exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. Encoding basic Python object hierarchies:: @@ -60,7 +65,7 @@ Pretty printing:: "6": 7 } -Specializing JSON object encoding:: +Customizing JSON object encoding:: >>> import json >>> def custom_json(obj): @@ -83,7 +88,7 @@ Decoding JSON:: >>> json.load(io) ['streaming API'] -Specializing JSON object decoding:: +Customizing JSON object decoding:: >>> import json >>> def as_complex(dct): @@ -178,8 +183,10 @@ Basic Usage :param bool ensure_ascii: If ``True`` (the default), the output is guaranteed to - have all incoming non-ASCII characters escaped. - If ``False``, these characters will be outputted as-is. + have all incoming non-ASCII and non-printable characters escaped. + If ``False``, all characters will be outputted as-is, except for + the characters that must be escaped: quotation mark, reverse solidus, + and the control characters U+0000 through U+001F. :param bool check_circular: If ``False``, the circular reference check for container types is skipped @@ -279,7 +286,7 @@ Basic Usage :param object_hook: If set, a function that is called with the result of - any object literal decoded (a :class:`dict`). + any JSON object literal decoded (a :class:`dict`). The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders, @@ -289,7 +296,7 @@ Basic Usage :param object_pairs_hook: If set, a function that is called with the result of - any object literal decoded with an ordered list of pairs. + any JSON object literal decoded with an ordered list of pairs. The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders. @@ -490,8 +497,10 @@ Encoders and Decoders :class:`bool` or ``None``. If *skipkeys* is true, such items are simply skipped. If *ensure_ascii* is true (the default), the output is guaranteed to - have all incoming non-ASCII characters escaped. If *ensure_ascii* is - false, these characters will be output as-is. + have all incoming non-ASCII and non-printable characters escaped. + If *ensure_ascii* is false, all characters will be output as-is, except for + the characters that must be escaped: quotation mark, reverse solidus, + and the control characters U+0000 through U+001F. If *check_circular* is true (the default), then lists, dicts, and custom encoded objects will be checked for circular references during encoding to @@ -631,7 +640,7 @@ UTF-32, with UTF-8 being the recommended default for maximum interoperability. As permitted, though not required, by the RFC, this module's serializer sets *ensure_ascii=True* by default, thus escaping the output so that the resulting -strings only contain ASCII characters. +strings only contain printable ASCII characters. Other than the *ensure_ascii* parameter, this module is defined strictly in terms of conversion between Python objects and diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 426e3a06e1ef11..790bd3a5836423 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -34,12 +34,17 @@ The :mod:`locale` module defines the following exception and functions: If *locale* is given and not ``None``, :func:`setlocale` modifies the locale setting for the *category*. The available categories are listed in the data - description below. *locale* may be a string, or an iterable of two strings - (language code and encoding). If it's an iterable, it's converted to a locale - name using the locale aliasing engine. An empty string specifies the user's + description below. *locale* may be a :ref:`string `, or a pair, + language code and encoding. An empty string specifies the user's default settings. If the modification of the locale fails, the exception :exc:`Error` is raised. If successful, the new locale setting is returned. + If *locale* is a pair, it is converted to a locale name using + the locale aliasing engine. + The language code has the same format as a :ref:`locale name `, + but without encoding and ``@``-modifier. + The language code and encoding can be ``None``. + If *locale* is omitted or ``None``, the current setting for *category* is returned. @@ -345,22 +350,26 @@ The :mod:`locale` module defines the following exception and functions: ``'LANG'``. The GNU gettext search path contains ``'LC_ALL'``, ``'LC_CTYPE'``, ``'LANG'`` and ``'LANGUAGE'``, in that order. - Except for the code ``'C'``, the language code corresponds to :rfc:`1766`. - *language code* and *encoding* may be ``None`` if their values cannot be + The language code has the same format as a :ref:`locale name `, + but without encoding and ``@``-modifier. + The language code and encoding may be ``None`` if their values cannot be determined. + The "C" locale is represented as ``(None, None)``. .. deprecated-removed:: 3.11 3.15 .. function:: getlocale(category=LC_CTYPE) - Returns the current setting for the given locale category as sequence containing - *language code*, *encoding*. *category* may be one of the :const:`!LC_\*` values - except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`. + Returns the current setting for the given locale category as a tuple containing + the language code and encoding. *category* may be one of the :const:`!LC_\*` + values except :const:`LC_ALL`. It defaults to :const:`LC_CTYPE`. - Except for the code ``'C'``, the language code corresponds to :rfc:`1766`. - *language code* and *encoding* may be ``None`` if their values cannot be + The language code has the same format as a :ref:`locale name `, + but without encoding and ``@``-modifier. + The language code and encoding may be ``None`` if their values cannot be determined. + The "C" locale is represented as ``(None, None)``. .. function:: getpreferredencoding(do_setlocale=True) @@ -508,8 +517,8 @@ The :mod:`locale` module defines the following exception and functions: SSH connections. Python doesn't internally use locale-dependent character transformation functions - from ``ctype.h``. Instead, an internal ``pyctype.h`` provides locale-independent - equivalents like :c:macro:`!Py_TOLOWER`. + from ``ctype.h``. Instead, ``pyctype.h`` provides locale-independent + equivalents like :c:macro:`Py_TOLOWER`. .. data:: LC_COLLATE @@ -615,6 +624,61 @@ whose high bit is set (i.e., non-ASCII bytes) are never converted or considered part of a character class such as letter or whitespace. +.. _locale_name: + +Locale names +------------ + +The format of the locale name is platform dependent, and the set of supported +locales can depend on the system configuration. + +On Posix platforms, it usually has the format [1]_: + +.. productionlist:: locale_name + : language ["_" territory] ["." charset] ["@" modifier] + +where *language* is a two- or three-letter language code from `ISO 639`_, +*territory* is a two-letter country or region code from `ISO 3166`_, +*charset* is a locale encoding, and *modifier* is a script name, +a language subtag, a sort order identifier, or other locale modifier +(for example, "latin", "valencia", "stroke" and "euro"). + +On Windows, several formats are supported. [2]_ [3]_ +A subset of `IETF BCP 47`_ tags: + +.. productionlist:: locale_name + : language ["-" script] ["-" territory] ["." charset] + : language ["-" script] "-" territory "-" modifier + +where *language* and *territory* have the same meaning as in Posix, +*script* is a four-letter script code from `ISO 15924`_, +and *modifier* is a language subtag, a sort order identifier +or custom modifier (for example, "valencia", "stroke" or "x-python"). +Both hyphen (``'-'``) and underscore (``'_'``) separators are supported. +Only UTF-8 encoding is allowed for BCP 47 tags. + +Windows also supports locale names in the format: + +.. productionlist:: locale_name + : language ["_" territory] ["." charset] + +where *language* and *territory* are full names, such as "English" and +"United States", and *charset* is either a code page number (for example, "1252") +or UTF-8. +Only the underscore separator is supported in this format. + +The "C" locale is supported on all platforms. + +.. _ISO 639: https://www.iso.org/iso-639-language-code +.. _ISO 3166: https://www.iso.org/iso-3166-country-codes.html +.. _IETF BCP 47: https://www.rfc-editor.org/info/bcp47 +.. _ISO 15924: https://www.unicode.org/iso15924/ + +.. [1] `IEEE Std 1003.1-2024; 8.2 Internationalization Variables `_ +.. [2] `UCRT Locale names, Languages, and Country/Region strings `_ +.. [3] `Locale Names `_ + + .. _embedding-locale: For extension writers and programs that embed Python diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index 0e9dc33ae2123a..96cca3073fec7e 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -548,7 +548,7 @@ mnemonic that the corresponding value is a callable. The ``filters`` member of ``handlers`` and ``loggers`` can take filter instances in addition to ids. -You can also specify a special key ``'.'`` whose value is a dictionary is a +You can also specify a special key ``'.'`` whose value is a mapping of attribute names to values. If found, the specified attributes will be set on the user-defined object before it is returned. Thus, with the following configuration:: @@ -586,7 +586,7 @@ configuration dictionary for the handler named ``foo``, and later (once that handler has been configured) it points to the configured handler instance. Thus, ``cfg://handlers.foo`` could resolve to either a dictionary or a handler instance. In general, it is wise to name handlers in a way such that dependent -handlers are configured _after_ any handlers they depend on; that allows +handlers are configured *after* any handlers they depend on; that allows something like ``cfg://handlers.foo`` to be used in configuring a handler that depends on handler ``foo``. If that dependent handler were named ``bar``, problems would result, because the configuration of ``bar`` would be attempted diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 63ef533e82c658..c9cfbdb4126fda 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -352,6 +352,10 @@ module, supports rotation of disk log files. Outputs the record to the file, catering for rollover as described previously. + .. method:: shouldRollover(record) + + See if the supplied record would cause the file to exceed the configured size limit. + .. _timed-rotating-file-handler: TimedRotatingFileHandler @@ -461,6 +465,11 @@ timed intervals. Returns a list of filenames which should be deleted as part of rollover. These are the absolute paths of the oldest backup log files written by the handler. + .. method:: shouldRollover(record) + + See if enough time has passed for a rollover to occur and if it has, compute + the next rollover time. + .. _socket-handler: SocketHandler @@ -1051,6 +1060,15 @@ possible, while any potentially slow operations (such as sending an email via .. note:: If you are using :mod:`multiprocessing`, you should avoid using :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`. + .. warning:: + + The :mod:`multiprocessing` module uses an internal logger created and + accessed via :meth:`~multiprocessing.get_logger`. + :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon + items being queued. If those log messages are processed by a + :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance, + it will cause a deadlock or infinite recursion. + .. method:: emit(record) Enqueues the result of preparing the LogRecord. Should an exception diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst index 72190e97240514..8c8f50692fa1fa 100644 --- a/Doc/library/logging.rst +++ b/Doc/library/logging.rst @@ -671,8 +671,7 @@ Formatter Objects which is just the logged message. :type fmt: str - :param datefmt: A format string in the given *style* for - the date/time portion of the logged output. + :param datefmt: A format string for the date/time portion of the logged output. If not specified, the default described in :meth:`formatTime` is used. :type datefmt: str @@ -680,7 +679,7 @@ Formatter Objects how the format string will be merged with its data: using one of :ref:`old-string-formatting` (``%``), :meth:`str.format` (``{``) or :class:`string.Template` (``$``). This only applies to - *fmt* and *datefmt* (e.g. ``'%(message)s'`` versus ``'{message}'``), + *fmt* (e.g. ``'%(message)s'`` versus ``'{message}'``), not to the actual log messages passed to the logging methods. However, there are :ref:`other ways ` to use ``{``- and ``$``-formatting for log messages. @@ -1083,12 +1082,13 @@ LoggerAdapter Objects information into logging calls. For a usage example, see the section on :ref:`adding contextual information to your logging output `. -.. class:: LoggerAdapter(logger, extra, merge_extra=False) +.. class:: LoggerAdapter(logger, extra=None, merge_extra=False) Returns an instance of :class:`LoggerAdapter` initialized with an - underlying :class:`Logger` instance, a dict-like object (*extra*), and a - boolean (*merge_extra*) indicating whether or not the *extra* argument of - individual log calls should be merged with the :class:`LoggerAdapter` extra. + underlying :class:`Logger` instance, an optional dict-like object (*extra*), + and an optional boolean (*merge_extra*) indicating whether or not + the *extra* argument of individual log calls should be merged with + the :class:`LoggerAdapter` extra. The default behavior is to ignore the *extra* argument of individual log calls and only use the one of the :class:`LoggerAdapter` instance @@ -1128,16 +1128,20 @@ information into logging calls. For a usage example, see the section on Attribute :attr:`!manager` and method :meth:`!_log` were added, which delegate to the underlying logger and allow adapters to be nested. + .. versionchanged:: 3.10 + + The *extra* argument is now optional. + .. versionchanged:: 3.13 - The *merge_extra* argument was added. + The *merge_extra* parameter was added. Thread Safety ------------- The logging module is intended to be thread-safe without any special work -needing to be done by its clients. It achieves this though using threading +needing to be done by its clients. It achieves this through using threading locks; there is one lock to serialize access to the module's shared data, and each handler also creates a lock to serialize access to its underlying I/O. diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst index 69f7cb8d48d7ae..8a4f68f3502521 100644 --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -23,6 +23,8 @@ module. Note that :class:`LZMAFile` and :class:`bz2.BZ2File` are *not* thread-safe, so if you need to use a single :class:`LZMAFile` instance from multiple threads, it is necessary to protect it with a lock. +.. include:: ../includes/optional-module.rst + .. exception:: LZMAError diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index e8a96f29ea185e..62e289573c0c7e 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -917,7 +917,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. copied; furthermore, any format-specific information is converted insofar as possible if *message* is a :class:`!Message` instance. If *message* is a string, a byte string, - or a file, it should contain an :rfc:`2822`\ -compliant message, which is read + or a file, it should contain an :rfc:`5322`\ -compliant message, which is read and parsed. Files should be open in binary mode, but text mode files are accepted for backward compatibility. diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 0749367045dfa9..8dad20bc83bcf3 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -10,8 +10,8 @@ -------------- -This module provides access to the mathematical functions defined by the C -standard. +This module provides access to common mathematical functions and constants, +including those defined by the C standard. These functions cannot be used with complex numbers; use the functions of the same name from the :mod:`cmath` module if you require support for complex @@ -144,8 +144,7 @@ Number-theoretic functions .. function:: factorial(n) - Return *n* factorial as an integer. Raises :exc:`ValueError` if *n* is not integral or - is negative. + Return factorial of the nonnegative integer *n*. .. versionchanged:: 3.10 Floats with integral values (like ``5.0``) are no longer accepted. @@ -775,7 +774,7 @@ Constants The mathematical constant *τ* = 6.283185..., to available precision. Tau is a circle constant equal to 2\ *π*, the ratio of a circle's circumference to its radius. To learn more about Tau, check out Vi Hart's video `Pi is (still) - Wrong `_, and start celebrating + Wrong `_, and start celebrating `Tau day `_ by eating twice as much pie! .. versionadded:: 3.6 diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index 4e20c07331a220..f78518ac2232a0 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -200,7 +200,8 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Writable :term:`bytes-like object` is now accepted. - .. method:: flush([offset[, size]]) + .. method:: flush() + flush(offset, size, /) Flushes changes made to the in-memory copy of a file back to disk. Without use of this call there is no guarantee that changes are written back before @@ -269,7 +270,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Resizing a map created with *access* of :const:`ACCESS_READ` or :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception. - Resizing a map created with with *trackfd* set to ``False``, + Resizing a map created with *trackfd* set to ``False``, will raise a :exc:`ValueError` exception. **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other diff --git a/Doc/library/msvcrt.rst b/Doc/library/msvcrt.rst index 327cc3602b1a77..a2c5e375d2cc4f 100644 --- a/Doc/library/msvcrt.rst +++ b/Doc/library/msvcrt.rst @@ -22,6 +22,8 @@ api. The normal API deals only with ASCII characters and is of limited use for internationalized applications. The wide char API should be used where ever possible. +.. availability:: Windows. + .. versionchanged:: 3.3 Operations in this module now raise :exc:`OSError` where :exc:`IOError` was raised. diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 6c43d5fe166e2f..92605c57527887 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -22,8 +22,7 @@ to this, the :mod:`multiprocessing` module allows the programmer to fully leverage multiple processors on a given machine. It runs on both POSIX and Windows. -The :mod:`multiprocessing` module also introduces APIs which do not have -analogs in the :mod:`threading` module. A prime example of this is the +The :mod:`multiprocessing` module also introduces the :class:`~multiprocessing.pool.Pool` object which offers a convenient means of parallelizing the execution of a function across multiple input values, distributing the input data across processes (data parallelism). The following @@ -44,6 +43,10 @@ will print to standard output :: [1, 4, 9] +The :mod:`multiprocessing` module also introduces APIs which do not have +analogs in the :mod:`threading` module, like the ability to :meth:`terminate +`, :meth:`interrupt ` or :meth:`kill +` a running process. .. seealso:: @@ -97,6 +100,10 @@ To show the individual process IDs involved, here is an expanded example:: For an explanation of why the ``if __name__ == '__main__'`` part is necessary, see :ref:`multiprocessing-programming`. +The arguments to :class:`Process` usually need to be unpickleable from within +the child process. If you tried typing the above example directly into a REPL it +could lead to an :exc:`AttributeError` in the child process trying to locate the +*f* function in the ``__main__`` module. .. _multiprocessing-start-methods: @@ -233,9 +240,12 @@ processes for a different context. In particular, locks created using the *fork* context cannot be passed to processes started using the *spawn* or *forkserver* start methods. -A library which wants to use a particular start method should probably -use :func:`get_context` to avoid interfering with the choice of the -library user. +Libraries using :mod:`multiprocessing` or +:class:`~concurrent.futures.ProcessPoolExecutor` should be designed to allow +their users to provide their own multiprocessing context. Using a specific +context of your own within a library can lead to incompatibilities with the +rest of the library user's application. Always document if your library +requires a specific start method. .. warning:: @@ -538,9 +548,42 @@ The :mod:`multiprocessing` package mostly replicates the API of the to pass to *target*. If a subclass overrides the constructor, it must make sure it invokes the - base class constructor (:meth:`Process.__init__`) before doing anything else + base class constructor (``super().__init__()``) before doing anything else to the process. + .. note:: + + In general, all arguments to :class:`Process` must be picklable. This is + frequently observed when trying to create a :class:`Process` or use a + :class:`concurrent.futures.ProcessPoolExecutor` from a REPL with a + locally defined *target* function. + + Passing a callable object defined in the current REPL session causes the + child process to die via an uncaught :exc:`AttributeError` exception when + starting as *target* must have been defined within an importable module + in order to be loaded during unpickling. + + Example of this uncatchable error from the child:: + + >>> import multiprocessing as mp + >>> def knigit(): + ... print("Ni!") + ... + >>> process = mp.Process(target=knigit) + >>> process.start() + >>> Traceback (most recent call last): + File ".../multiprocessing/spawn.py", line ..., in spawn_main + File ".../multiprocessing/spawn.py", line ..., in _main + AttributeError: module '__main__' has no attribute 'knigit' + >>> process + + + See :ref:`multiprocessing-programming-spawn`. While this restriction is + not true if using the ``"fork"`` start method, as of Python ``3.14`` that + is no longer the default on any platform. See + :ref:`multiprocessing-start-methods`. + See also :gh:`132898`. + .. versionchanged:: 3.3 Added the *daemon* parameter. @@ -789,8 +832,8 @@ raising an exception. One difference from other Python queue implementations, is that :mod:`multiprocessing` queues serializes all objects that are put into them using :mod:`pickle`. -The object return by the get method is a re-created object that does not share memory -with the original object. +The object returned by the get method is a re-created object that does not share +memory with the original object. Note that one can also create a shared queue by using a manager object -- see :ref:`multiprocessing-managers`. @@ -847,7 +890,7 @@ For an example of the usage of queues for interprocess communication see :ref:`multiprocessing-examples`. -.. function:: Pipe([duplex]) +.. function:: Pipe(duplex=True) Returns a pair ``(conn1, conn2)`` of :class:`~multiprocessing.connection.Connection` objects representing the @@ -936,8 +979,13 @@ For an example of the usage of queues for interprocess communication see .. method:: close() - Indicate that no more data will be put on this queue by the current - process. The background thread will quit once it has flushed all buffered + Close the queue: release internal resources. + + A queue must not be used anymore after it is closed. For example, + :meth:`~Queue.get`, :meth:`~Queue.put` and :meth:`~Queue.empty` + methods must no longer be called. + + The background thread will quit once it has flushed all buffered data to the pipe. This is called automatically when the queue is garbage collected. @@ -1081,7 +1129,7 @@ Miscellaneous .. function:: freeze_support() Add support for when a program which uses :mod:`multiprocessing` has been - frozen to produce a Windows executable. (Has been tested with **py2exe**, + frozen to produce an executable. (Has been tested with **py2exe**, **PyInstaller** and **cx_Freeze**.) One needs to call this function straight after the ``if __name__ == @@ -1099,10 +1147,10 @@ Miscellaneous If the ``freeze_support()`` line is omitted then trying to run the frozen executable will raise :exc:`RuntimeError`. - Calling ``freeze_support()`` has no effect when invoked on any operating - system other than Windows. In addition, if the module is being run - normally by the Python interpreter on Windows (the program has not been - frozen), then ``freeze_support()`` has no effect. + Calling ``freeze_support()`` has no effect when the start method is not + *spawn*. In addition, if the module is being run normally by the Python + interpreter (the program has not been frozen), then ``freeze_support()`` + has no effect. .. function:: get_all_start_methods() @@ -1118,7 +1166,9 @@ Miscellaneous Return a context object which has the same attributes as the :mod:`multiprocessing` module. - If *method* is ``None`` then the default context is returned. + If *method* is ``None`` then the default context is returned. Note that if + the global start method has not been set, this will set it to the + default method. Otherwise *method* should be ``'fork'``, ``'spawn'``, ``'forkserver'``. :exc:`ValueError` is raised if the specified start method is not available. See :ref:`multiprocessing-start-methods`. @@ -1129,10 +1179,10 @@ Miscellaneous Return the name of start method used for starting processes. - If the start method has not been fixed and *allow_none* is false, - then the start method is fixed to the default and the name is - returned. If the start method has not been fixed and *allow_none* - is true then ``None`` is returned. + If the global start method has not been set and *allow_none* is + ``False``, then the start method is set to the default and the name + is returned. If the start method has not been set and *allow_none* is + ``True`` then ``None`` is returned. The return value can be ``'fork'``, ``'spawn'``, ``'forkserver'`` or ``None``. See :ref:`multiprocessing-start-methods`. @@ -1369,6 +1419,12 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + .. note:: On macOS, this is indistinguishable from :class:`Semaphore` because ``sem_getvalue()`` is not implemented on that platform. @@ -1521,6 +1577,22 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + + .. method:: get_value() + + Return the current value of semaphore. + + Note that this may raise :exc:`NotImplementedError` on platforms like + macOS where ``sem_getvalue()`` is not implemented. + + + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + + .. note:: On macOS, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with @@ -3051,10 +3123,10 @@ start method. More picklability - Ensure that all arguments to :meth:`Process.__init__` are picklable. - Also, if you subclass :class:`~multiprocessing.Process` then make sure that - instances will be picklable when the :meth:`Process.start - ` method is called. + Ensure that all arguments to :class:`~multiprocessing.Process` are + picklable. Also, if you subclass ``Process.__init__``, you must make sure + that instances will be picklable when the + :meth:`Process.start ` method is called. Global variables diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst index f6260383b2b057..74c97e8c9a9759 100644 --- a/Doc/library/netrc.rst +++ b/Doc/library/netrc.rst @@ -24,12 +24,14 @@ the Unix :program:`ftp` program and other FTP clients. a :exc:`FileNotFoundError` exception will be raised. Parse errors will raise :exc:`NetrcParseError` with diagnostic information including the file name, line number, and terminating token. + If no argument is specified on a POSIX system, the presence of passwords in the :file:`.netrc` file will raise a :exc:`NetrcParseError` if the file ownership or permissions are insecure (owned by a user other than the user running the process, or accessible for read or write by any other user). This implements security behavior equivalent to that of ftp and other - programs that use :file:`.netrc`. + programs that use :file:`.netrc`. Such security checks are not available + on platforms that do not support :func:`os.getuid`. .. versionchanged:: 3.4 Added the POSIX permission check. diff --git a/Doc/library/numbers.rst b/Doc/library/numbers.rst index 681d0b76f2a14b..57b35017072c97 100644 --- a/Doc/library/numbers.rst +++ b/Doc/library/numbers.rst @@ -69,11 +69,11 @@ The numeric tower .. attribute:: numerator - Abstract. + Abstract. The numerator of this rational number. .. attribute:: denominator - Abstract. + Abstract. The denominator of this rational number. .. class:: Integral diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index ecbbc1d7605f9f..f18ca92bb206f3 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -42,8 +42,8 @@ the :mod:`glob` module.) a path that is *always* in one of the different formats. They all have the same interface: - * :mod:`posixpath` for UNIX-style paths - * :mod:`ntpath` for Windows paths + * :mod:`!posixpath` for UNIX-style paths + * :mod:`!ntpath` for Windows paths .. versionchanged:: 3.8 @@ -64,7 +64,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: basename(path) +.. function:: basename(path, /) Return the base name of pathname *path*. This is the second element of the pair returned by passing *path* to the function :func:`split`. Note that @@ -94,7 +94,7 @@ the :mod:`glob` module.) Any iterable can now be passed, rather than just sequences. -.. function:: commonprefix(list) +.. function:: commonprefix(list, /) Return the longest path prefix (taken character-by-character) that is a prefix of all paths in *list*. If *list* is empty, return the empty string @@ -118,7 +118,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: dirname(path) +.. function:: dirname(path, /) Return the directory name of pathname *path*. This is the first element of the pair returned by passing *path* to the function :func:`split`. @@ -199,14 +199,14 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getatime(path) +.. function:: getatime(path, /) Return the time of last access of *path*. The return value is a floating-point number giving the number of seconds since the epoch (see the :mod:`time` module). Raise :exc:`OSError` if the file does not exist or is inaccessible. -.. function:: getmtime(path) +.. function:: getmtime(path, /) Return the time of last modification of *path*. The return value is a floating-point number giving the number of seconds since the epoch (see the :mod:`time` module). @@ -216,7 +216,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getctime(path) +.. function:: getctime(path, /) Return the system's ctime which, on some systems (like Unix) is the time of the last metadata change, and, on others (like Windows), is the creation time for *path*. @@ -228,7 +228,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getsize(path) +.. function:: getsize(path, /) Return the size, in bytes, of *path*. Raise :exc:`OSError` if the file does not exist or is inaccessible. @@ -237,7 +237,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: isabs(path) +.. function:: isabs(path, /) Return ``True`` if *path* is an absolute pathname. On Unix, that means it begins with a slash, on Windows that it begins with two (back)slashes, or a @@ -261,7 +261,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: isdir(path) +.. function:: isdir(path, /) Return ``True`` if *path* is an :func:`existing ` directory. This follows symbolic links, so both :func:`islink` and :func:`isdir` can be true @@ -298,9 +298,10 @@ the :mod:`glob` module.) device than *path*, or whether :file:`{path}/..` and *path* point to the same i-node on the same device --- this should detect mount points for all Unix and POSIX variants. It is not able to reliably detect bind mounts on the - same filesystem. On Windows, a drive letter root and a share UNC are - always mount points, and for any other path ``GetVolumePathName`` is called - to see if it is different from the input path. + same filesystem. On Linux systems, it will always return ``True`` for btrfs + subvolumes, even if they aren't mount points. On Windows, a drive letter root + and a share UNC are always mount points, and for any other path + ``GetVolumePathName`` is called to see if it is different from the input path. .. versionchanged:: 3.4 Added support for detecting non-root mount points on Windows. @@ -350,7 +351,7 @@ the :mod:`glob` module.) .. versionadded:: 3.13 -.. function:: join(path, *paths) +.. function:: join(path, /, *paths) Join one or more path segments intelligently. The return value is the concatenation of *path* and all members of *\*paths*, with exactly one @@ -371,7 +372,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object` for *path* and *paths*. -.. function:: normcase(path) +.. function:: normcase(path, /) Normalize the case of a pathname. On Windows, convert all characters in the pathname to lowercase, and also convert forward slashes to backward slashes. @@ -401,16 +402,33 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: realpath(path, *, strict=False) +.. function:: realpath(path, /, *, strict=False) Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating system). On Windows, this function will also resolve MS-DOS (also called 8.3) style names such as ``C:\\PROGRA~1`` to ``C:\\Program Files``. - If a path doesn't exist or a symlink loop is encountered, and *strict* is - ``True``, :exc:`OSError` is raised. If *strict* is ``False`` these errors - are ignored, and so the result might be missing or otherwise inaccessible. + By default, the path is evaluated up to the first component that does not + exist, is a symlink loop, or whose evaluation raises :exc:`OSError`. + All such components are appended unchanged to the existing part of the path. + + Some errors that are handled this way include "access denied", "not a + directory", or "bad argument to internal function". Thus, the + resulting path may be missing or inaccessible, may still contain + links or loops, and may traverse non-directories. + + This behavior can be modified by keyword arguments: + + If *strict* is ``True``, the first error encountered when evaluating the path is + re-raised. + In particular, :exc:`FileNotFoundError` is raised if *path* does not exist, + or another :exc:`OSError` if it is otherwise inaccessible. + + If *strict* is :py:data:`os.path.ALLOW_MISSING`, errors other than + :exc:`FileNotFoundError` are re-raised (as with ``strict=True``). + Thus, the returned path will not contain any symbolic links, but the named + file and some of its parent directories may be missing. .. note:: This function emulates the operating system's procedure for making a path @@ -429,6 +447,15 @@ the :mod:`glob` module.) .. versionchanged:: 3.10 The *strict* parameter was added. + .. versionchanged:: 3.14 + The :py:data:`~os.path.ALLOW_MISSING` value for the *strict* parameter + was added. + +.. data:: ALLOW_MISSING + + Special value used for the *strict* argument in :func:`realpath`. + + .. versionadded:: 3.14 .. function:: relpath(path, start=os.curdir) @@ -444,7 +471,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: samefile(path1, path2) +.. function:: samefile(path1, path2, /) Return ``True`` if both pathname arguments refer to the same file or directory. This is determined by the device number and i-node number and raises an @@ -471,7 +498,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: samestat(stat1, stat2) +.. function:: samestat(stat1, stat2, /) Return ``True`` if the stat tuples *stat1* and *stat2* refer to the same file. These structures may have been returned by :func:`os.fstat`, @@ -481,11 +508,8 @@ the :mod:`glob` module.) .. versionchanged:: 3.4 Added Windows support. - .. versionchanged:: 3.6 - Accepts a :term:`path-like object`. - -.. function:: split(path) +.. function:: split(path, /) Split the pathname *path* into a pair, ``(head, tail)`` where *tail* is the last pathname component and *head* is everything leading up to that. The @@ -501,7 +525,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: splitdrive(path) +.. function:: splitdrive(path, /) Split the pathname *path* into a pair ``(drive, tail)`` where *drive* is either a mount point or the empty string. On systems which do not use drive @@ -526,7 +550,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: splitroot(path) +.. function:: splitroot(path, /) Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where *drive* is a device name or mount point, *root* is a string of separators @@ -559,7 +583,7 @@ the :mod:`glob` module.) .. versionadded:: 3.12 -.. function:: splitext(path) +.. function:: splitext(path, /) Split the pathname *path* into a pair ``(root, ext)`` such that ``root + ext == path``, and the extension, *ext*, is empty or begins with a period and contains at diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 1e54cfec609bd2..1e109264d3dd1d 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -561,7 +561,7 @@ process and user. .. function:: initgroups(username, gid, /) - Call the system initgroups() to initialize the group access list with all of + Call the system ``initgroups()`` to initialize the group access list with all of the groups of which the specified username is a member, plus the specified group id. @@ -2009,8 +2009,8 @@ features: must be a string specifying a file path. However, some functions now alternatively accept an open file descriptor for their *path* argument. The function will then operate on the file referred to by the descriptor. - (For POSIX systems, Python will call the variant of the function prefixed - with ``f`` (e.g. call ``fchdir`` instead of ``chdir``).) + For POSIX systems, Python will call the variant of the function prefixed + with ``f`` (e.g. call ``fchdir`` instead of ``chdir``). You can check whether or not *path* can be specified as a file descriptor for a particular function on your platform using :data:`os.supports_fd`. @@ -2025,7 +2025,7 @@ features: * **paths relative to directory descriptors:** If *dir_fd* is not ``None``, it should be a file descriptor referring to a directory, and the path to operate on should be relative; path will then be relative to that directory. If the - path is absolute, *dir_fd* is ignored. (For POSIX systems, Python will call + path is absolute, *dir_fd* is ignored. For POSIX systems, Python will call the variant of the function with an ``at`` suffix and possibly prefixed with ``f`` (e.g. call ``faccessat`` instead of ``access``). @@ -2038,8 +2038,8 @@ features: * **not following symlinks:** If *follow_symlinks* is ``False``, and the last element of the path to operate on is a symbolic link, the function will operate on the symbolic link itself rather than the file - pointed to by the link. (For POSIX systems, Python will call the ``l...`` - variant of the function.) + pointed to by the link. For POSIX systems, Python will call the ``l...`` + variant of the function. You can check whether or not *follow_symlinks* is supported for a particular function on your platform using :data:`os.supports_follow_symlinks`. @@ -3505,6 +3505,9 @@ features: Create a symbolic link pointing to *src* named *dst*. + The *src* parameter refers to the target of the link (the file or directory being linked to), + and *dst* is the name of the link being created. + On Windows, a symlink represents either a file or a directory, and does not morph to the target dynamically. If the target is present, the type of the symlink will be created to match. Otherwise, the symlink will be created @@ -3760,9 +3763,9 @@ features: import os for root, dirs, files, rootfd in os.fwalk('python/Lib/xml'): - print(root, "consumes", end="") + print(root, "consumes", end=" ") print(sum([os.stat(name, dir_fd=rootfd).st_size for name in files]), - end="") + end=" ") print("bytes in", len(files), "non-directory files") if '__pycache__' in dirs: dirs.remove('__pycache__') # don't visit __pycache__ directories diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7d7692dea5c38c..8d16009bc809a1 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -311,7 +311,7 @@ Pure paths provide the following methods and properties: .. attribute:: PurePath.parser The implementation of the :mod:`os.path` module used for low-level path - parsing and joining: either :mod:`posixpath` or :mod:`ntpath`. + parsing and joining: either :mod:`!posixpath` or :mod:`!ntpath`. .. versionadded:: 3.13 @@ -1781,9 +1781,12 @@ The following wildcards are supported in patterns for ``?`` Matches one non-separator character. ``[seq]`` - Matches one character in *seq*. + Matches one character in *seq*, where *seq* is a sequence of characters. + Range expressions are supported; for example, ``[a-z]`` matches any lowercase ASCII letter. + Multiple ranges can be combined: ``[a-zA-Z0-9_]`` matches any ASCII letter, digit, or underscore. + ``[!seq]`` - Matches one character not in *seq*. + Matches one character not in *seq*, where *seq* follows the same rules as above. For a literal match, wrap the meta-characters in brackets. For example, ``"[?]"`` matches the character ``"?"``. @@ -1982,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking. If *follow_symlinks* is ``False``, return ``True`` only if the path is a file (without following symlinks); return ``False`` if the path - is a directory or other other non-file, or if it doesn't exist. + is a directory or other non-file, or if it doesn't exist. .. method:: is_symlink() diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index a0304edddf6478..0bbdc42535290a 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -75,12 +75,17 @@ The debugger's prompt is ``(Pdb)``, which is the indicator that you are in debug arguments of the ``p`` command. +.. _pdb-cli: + +Command-line interface +---------------------- + .. program:: pdb You can also invoke :mod:`pdb` from the command line to debug other scripts. For example:: - python -m pdb [-c command] (-m module | pyfile) [args ...] + python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post-mortem debugging (or @@ -104,6 +109,24 @@ useful than quitting the debugger upon program's exit. .. versionchanged:: 3.7 Added the ``-m`` option. +.. option:: -p, --pid + + Attach to the process with the specified PID. + + .. versionadded:: 3.14 + + +To attach to a running Python process for remote debugging, use the ``-p`` or +``--pid`` option with the target process's PID:: + + python -m pdb -p 1234 + +.. note:: + + Attaching to a process that is blocked in a system call or waiting for I/O + will only work once the next bytecode instruction is executed or when the + process receives a signal. + Typical usage to execute a statement under control of the debugger is:: >>> import pdb @@ -315,7 +338,7 @@ access further features, you have to do this yourself: .. _debugger-commands: -Debugger Commands +Debugger commands ----------------- The commands recognized by the debugger are listed below. Most commands can be diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst index 007c9fe1b950cf..3a9b66ec7e7088 100644 --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -732,8 +732,8 @@ or both. These items will be appended to the object either using ``obj.append(item)`` or, in batch, using ``obj.extend(list_of_items)``. This is primarily used for list subclasses, but may be used by other - classes as long as they have - :ref:`append and extend methods ` with + classes as long as they have :meth:`~sequence.append` + and :meth:`~sequence.extend` methods with the appropriate signature. (Whether :meth:`!append` or :meth:`!extend` is used depends on which pickle protocol version is used as well as the number of items to append, so both must be supported.) diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 20b8f6bcf19117..47d24b6f7d06bb 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -69,8 +69,8 @@ support. Yield :term:`finder` objects for the given module name. - If fullname contains a ``'.'``, the finders will be for the package - containing fullname, otherwise they will be all registered top level + If *fullname* contains a ``'.'``, the finders will be for the package + containing *fullname*, otherwise they will be all registered top level finders (i.e. those on both :data:`sys.meta_path` and :data:`sys.path_hooks`). If the named module is in a package, that package is imported as a side diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index 5c999054323be5..256baa53ed59c7 100644 --- a/Doc/library/platform.rst +++ b/Doc/library/platform.rst @@ -56,6 +56,8 @@ Cross platform Returns the machine type, e.g. ``'AMD64'``. An empty string is returned if the value cannot be determined. + The output is platform-dependent and may differ in casing and naming conventions. + .. function:: node() @@ -176,8 +178,8 @@ Cross platform :attr:`processor` is resolved late, on demand. Note: the first two attribute names differ from the names presented by - :func:`os.uname`, where they are named :attr:`sysname` and - :attr:`nodename`. + :func:`os.uname`, where they are named :attr:`!sysname` and + :attr:`!nodename`. Entries which cannot be determined are set to ``''``. @@ -187,6 +189,14 @@ Cross platform .. versionchanged:: 3.9 :attr:`processor` is resolved late instead of immediately. +.. function:: invalidate_caches() + + Clear out the internal cache of information, such as the :func:`uname`. + This is typically useful when the platform's :func:`node` is changed + by an external process and one needs to retrieve the updated value. + + .. versionadded:: 3.14 + Java platform ------------- @@ -388,14 +398,3 @@ The following options are accepted: You can also pass one or more positional arguments (``terse``, ``nonaliased``) to explicitly control the output format. These behave similarly to their corresponding options. - -Miscellaneous -------------- - -.. function:: invalidate_caches() - - Clear out the internal cache of information, such as the :func:`uname`. - This is typically useful when the platform's :func:`node` is changed - by an external process and one needs to retrieve the updated value. - - .. versionadded:: 3.14 diff --git a/Doc/library/pprint.rst b/Doc/library/pprint.rst index 2985f31bacb47a..f51892450798ae 100644 --- a/Doc/library/pprint.rst +++ b/Doc/library/pprint.rst @@ -22,8 +22,6 @@ The formatted representation keeps objects on a single line if it can, and breaks them onto multiple lines if they don't fit within the allowed width, adjustable by the *width* parameter defaulting to 80 characters. -Dictionaries are sorted by key before the display is computed. - .. versionchanged:: 3.9 Added support for pretty-printing :class:`types.SimpleNamespace`. diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst index b6e51dffc40157..89433af5d879da 100644 --- a/Doc/library/profile.rst +++ b/Doc/library/profile.rst @@ -499,7 +499,7 @@ Analysis of the profiler data is done using the :class:`~pstats.Stats` class. significant entries. Initially, the list is taken to be the complete set of profiled functions. Each restriction is either an integer (to select a count of lines), or a decimal fraction between 0.0 and 1.0 inclusive (to - select a percentage of lines), or a string that will interpreted as a + select a percentage of lines), or a string that will be interpreted as a regular expression (to pattern match the standard name that is printed). If several restrictions are provided, then they are applied sequentially. For example:: diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 2d57cff10a9278..b2dd92f7ba2b9d 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -16,11 +16,10 @@ references to these attributes should be marked using the :member: role. -.. warning:: +.. note:: - The :mod:`pyexpat` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. index:: single: Expat @@ -73,6 +72,13 @@ The :mod:`xml.parsers.expat` module contains two functions: *encoding* [1]_ is given it will override the implicit or explicit encoding of the document. + .. _xmlparser-non-root: + + Parsers created through :func:`!ParserCreate` are called "root" parsers, + in the sense that they do not have any parent parser attached. Non-root + parsers are created by :meth:`parser.ExternalEntityParserCreate + `. + Expat can optionally do XML namespace processing for you, enabled by providing a value for *namespace_separator*. The value must be a one-character string; a :exc:`ValueError` will be raised if the string has an illegal length (``None`` @@ -232,6 +238,55 @@ XMLParser Objects .. versionadded:: 3.13 +:class:`!xmlparser` objects have the following methods to mitigate some +common XML vulnerabilities. + +.. method:: xmlparser.SetAllocTrackerActivationThreshold(threshold, /) + + Sets the number of allocated bytes of dynamic memory needed to activate + protection against disproportionate use of RAM. + + By default, parser objects have an allocation activation threshold of 64 MiB, + or equivalently 67,108,864 bytes. + + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. + + .. versionadded:: 3.14.1 + +.. method:: xmlparser.SetAllocTrackerMaximumAmplification(max_factor, /) + + Sets the maximum amplification factor between direct input and bytes + of dynamic memory allocated. + + The amplification factor is calculated as ``allocated / direct`` + while parsing, where ``direct`` is the number of bytes read from + the primary document in parsing and ``allocated`` is the number + of bytes of dynamic memory allocated in the parser hierarchy. + + The *max_factor* value must be a non-NaN :class:`float` value greater than + or equal to 1.0. Amplification factors greater than 100.0 can be observed + near the start of parsing even with benign files in practice. In particular, + the activation threshold should be carefully chosen to avoid false positives. + + By default, parser objects have a maximum amplification factor of 100.0. + + An :exc:`ExpatError` is raised if this method is called on a + |xml-non-root-parser| parser or if *max_factor* is outside the valid range. + The corresponding :attr:`~ExpatError.lineno` and :attr:`~ExpatError.offset` + should not be used as they may have no special meaning. + + .. note:: + + The maximum amplification factor is only considered if the threshold + that can be adjusted by :meth:`.SetAllocTrackerActivationThreshold` + is exceeded. + + .. versionadded:: 3.14.1 + + :class:`xmlparser` objects have the following attributes: @@ -503,6 +558,15 @@ otherwise stated. .. method:: xmlparser.ExternalEntityRefHandler(context, base, systemId, publicId) + .. warning:: + + Implementing a handler that accesses local files and/or the network + may create a vulnerability to + `external entity attacks `_ + if :class:`xmlparser` is used with user-provided XML content. + Please reflect on your `threat model `_ + before implementing this handler. + Called for references to external entities. *base* is the current base, as set by a previous call to :meth:`SetBase`. The public and system identifiers, *systemId* and *publicId*, are strings if given; if the public identifier is not @@ -955,3 +1019,4 @@ The ``errors`` module has the following attributes: not. See https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EncodingDecl and https://www.iana.org/assignments/character-sets/character-sets.xhtml. +.. |xml-non-root-parser| replace:: :ref:`non-root ` diff --git a/Doc/library/python.rst b/Doc/library/python.rst index c2c231af7c3033..c5c762e11b99e5 100644 --- a/Doc/library/python.rst +++ b/Doc/library/python.rst @@ -27,3 +27,8 @@ overview: inspect.rst annotationlib.rst site.rst + +.. seealso:: + + * See the :mod:`concurrent.interpreters` module, which similarly + exposes core runtime functionality. diff --git a/Doc/library/queue.rst b/Doc/library/queue.rst index fbbebcf4ed8f92..1b75582f0cf45b 100644 --- a/Doc/library/queue.rst +++ b/Doc/library/queue.rst @@ -187,9 +187,6 @@ fully processed by daemon consumer threads. processed (meaning that a :meth:`task_done` call was received for every item that had been :meth:`put` into the queue). - ``shutdown(immediate=True)`` calls :meth:`task_done` for each remaining item - in the queue. - Raises a :exc:`ValueError` if called more times than there were items placed in the queue. @@ -204,6 +201,9 @@ fully processed by daemon consumer threads. count of unfinished tasks drops to zero, :meth:`join` unblocks. +Waiting for task completion +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + Example of how to wait for enqueued tasks to be completed:: import threading @@ -233,22 +233,39 @@ Example of how to wait for enqueued tasks to be completed:: Terminating queues ^^^^^^^^^^^^^^^^^^ -:class:`Queue` objects can be made to prevent further interaction by shutting -them down. +When no longer needed, :class:`Queue` objects can be wound down +until empty or terminated immediately with a hard shutdown. .. method:: Queue.shutdown(immediate=False) - Shut down the queue, making :meth:`~Queue.get` and :meth:`~Queue.put` raise - :exc:`ShutDown`. + Put a :class:`Queue` instance into a shutdown mode. + + The queue can no longer grow. + Future calls to :meth:`~Queue.put` raise :exc:`ShutDown`. + Currently blocked callers of :meth:`~Queue.put` will be unblocked + and will raise :exc:`ShutDown` in the formerly blocked thread. + + If *immediate* is false (the default), the queue can be wound + down normally with :meth:`~Queue.get` calls to extract tasks + that have already been loaded. + + And if :meth:`~Queue.task_done` is called for each remaining task, a + pending :meth:`~Queue.join` will be unblocked normally. + + Once the queue is empty, future calls to :meth:`~Queue.get` will + raise :exc:`ShutDown`. - By default, :meth:`~Queue.get` on a shut down queue will only raise once the - queue is empty. Set *immediate* to true to make :meth:`~Queue.get` raise - immediately instead. + If *immediate* is true, the queue is terminated immediately. + The queue is drained to be completely empty and the count + of unfinished tasks is reduced by the number of tasks drained. + If unfinished tasks is zero, callers of :meth:`~Queue.join` + are unblocked. Also, blocked callers of :meth:`~Queue.get` + are unblocked and will raise :exc:`ShutDown` because the + queue is empty. - All blocked callers of :meth:`~Queue.put` and :meth:`~Queue.get` will be - unblocked. If *immediate* is true, a task will be marked as done for each - remaining item in the queue, which may unblock callers of - :meth:`~Queue.join`. + Use caution when using :meth:`~Queue.join` with *immediate* set + to true. This unblocks the join even when no work has been done + on the tasks, violating the usual invariant for joining a queue. .. versionadded:: 3.13 diff --git a/Doc/library/random.rst b/Doc/library/random.rst index ef0cfb0e76cef6..b1120b3a4d8eb4 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -447,6 +447,11 @@ Alternative Generator Override this method in subclasses to customise the :meth:`~random.getrandbits` behaviour of :class:`!Random` instances. + .. method:: Random.randbytes(n) + + Override this method in subclasses to customise the + :meth:`~random.randbytes` behaviour of :class:`!Random` instances. + .. class:: SystemRandom([seed]) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index eb3b1e5549cc98..75ebbf11c8e47c 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -991,8 +991,8 @@ Functions That way, separator components are always found at the same relative indices within the result list. - Empty matches for the pattern split the string only when not adjacent - to a previous empty match. + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. .. code:: pycon @@ -1095,9 +1095,12 @@ Functions The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all - occurrences will be replaced. Empty matches for the pattern are replaced only - when not adjacent to a previous empty match, so ``sub('x*', '-', 'abxd')`` returns - ``'-a-b--d-'``. + occurrences will be replaced. + + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. + As a result, ``sub('x*', '-', 'abxd')`` returns ``'-a-b--d-'`` + instead of ``'-a-b-d-'``. .. index:: single: \g; in regular expressions @@ -1128,8 +1131,7 @@ Functions .. versionchanged:: 3.7 Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter now are errors. - Empty matches for the pattern are replaced when adjacent to a previous - non-empty match. + An empty match can occur immediately after a non-empty match. .. versionchanged:: 3.12 Group *id* can only contain ASCII digits. diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index f649fce5efc377..75db832c546b64 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -26,6 +26,8 @@ Readline library in general. .. include:: ../includes/wasm-mobile-notavail.rst +.. include:: ../includes/optional-module.rst + .. note:: The underlying Readline library API may be implemented by diff --git a/Doc/library/resource.rst b/Doc/library/resource.rst index 0515d205bbca0b..512f0852dd5333 100644 --- a/Doc/library/resource.rst +++ b/Doc/library/resource.rst @@ -63,12 +63,12 @@ this module for those platforms. Sets new limits of consumption of *resource*. The *limits* argument must be a tuple ``(soft, hard)`` of two integers describing the new limits. A value of - :data:`~resource.RLIM_INFINITY` can be used to request a limit that is + :const:`~resource.RLIM_INFINITY` can be used to request a limit that is unlimited. Raises :exc:`ValueError` if an invalid resource is specified, if the new soft limit exceeds the hard limit, or if a process tries to raise its hard limit. - Specifying a limit of :data:`~resource.RLIM_INFINITY` when the hard or + Specifying a limit of :const:`~resource.RLIM_INFINITY` when the hard or system limit for that resource is not unlimited will result in a :exc:`ValueError`. A process with the effective UID of super-user can request any valid limit value, including unlimited, but :exc:`ValueError` @@ -78,7 +78,7 @@ this module for those platforms. ``setrlimit`` may also raise :exc:`error` if the underlying system call fails. - VxWorks only supports setting :data:`RLIMIT_NOFILE`. + VxWorks only supports setting :const:`RLIMIT_NOFILE`. .. audit-event:: resource.setrlimit resource,limits resource.setrlimit @@ -127,7 +127,7 @@ platform. .. data:: RLIMIT_CPU The maximum amount of processor time (in seconds) that a process can use. If - this limit is exceeded, a :const:`SIGXCPU` signal is sent to the process. (See + this limit is exceeded, a :const:`~signal.SIGXCPU` signal is sent to the process. (See the :mod:`signal` module documentation for information about how to catch this signal and do something useful, e.g. flush open files to disk.) @@ -176,8 +176,9 @@ platform. .. data:: RLIMIT_VMEM The largest area of mapped memory which the process may occupy. + Usually an alias of :const:`RLIMIT_AS`. - .. availability:: FreeBSD >= 11. + .. availability:: Solaris, FreeBSD, NetBSD. .. data:: RLIMIT_AS @@ -230,16 +231,18 @@ platform. .. versionadded:: 3.4 + .. data:: RLIMIT_SBSIZE The maximum size (in bytes) of socket buffer usage for this user. This limits the amount of network memory, and hence the amount of mbufs, that this user may hold at any time. - .. availability:: FreeBSD. + .. availability:: FreeBSD, NetBSD. .. versionadded:: 3.4 + .. data:: RLIMIT_SWAP The maximum size (in bytes) of the swap space that may be reserved or @@ -249,18 +252,20 @@ platform. `tuning(7) `__ for a complete description of this sysctl. - .. availability:: FreeBSD. + .. availability:: FreeBSD >= 8. .. versionadded:: 3.4 + .. data:: RLIMIT_NPTS The maximum number of pseudo-terminals created by this user id. - .. availability:: FreeBSD. + .. availability:: FreeBSD >= 8. .. versionadded:: 3.4 + .. data:: RLIMIT_KQUEUES The maximum number of kqueues this user id is allowed to create. @@ -269,6 +274,7 @@ platform. .. versionadded:: 3.10 + Resource Usage -------------- @@ -304,47 +310,47 @@ These functions are used to retrieve resource usage information: For backward compatibility, the return value is also accessible as a tuple of 16 elements. - The fields :attr:`ru_utime` and :attr:`ru_stime` of the return value are + The fields :attr:`!ru_utime` and :attr:`!ru_stime` of the return value are floating-point values representing the amount of time spent executing in user mode and the amount of time spent executing in system mode, respectively. The remaining values are integers. Consult the :manpage:`getrusage(2)` man page for detailed information about these values. A brief summary is presented here: - +--------+---------------------+---------------------------------------+ - | Index | Field | Resource | - +========+=====================+=======================================+ - | ``0`` | :attr:`ru_utime` | time in user mode (float seconds) | - +--------+---------------------+---------------------------------------+ - | ``1`` | :attr:`ru_stime` | time in system mode (float seconds) | - +--------+---------------------+---------------------------------------+ - | ``2`` | :attr:`ru_maxrss` | maximum resident set size | - +--------+---------------------+---------------------------------------+ - | ``3`` | :attr:`ru_ixrss` | shared memory size | - +--------+---------------------+---------------------------------------+ - | ``4`` | :attr:`ru_idrss` | unshared memory size | - +--------+---------------------+---------------------------------------+ - | ``5`` | :attr:`ru_isrss` | unshared stack size | - +--------+---------------------+---------------------------------------+ - | ``6`` | :attr:`ru_minflt` | page faults not requiring I/O | - +--------+---------------------+---------------------------------------+ - | ``7`` | :attr:`ru_majflt` | page faults requiring I/O | - +--------+---------------------+---------------------------------------+ - | ``8`` | :attr:`ru_nswap` | number of swap outs | - +--------+---------------------+---------------------------------------+ - | ``9`` | :attr:`ru_inblock` | block input operations | - +--------+---------------------+---------------------------------------+ - | ``10`` | :attr:`ru_oublock` | block output operations | - +--------+---------------------+---------------------------------------+ - | ``11`` | :attr:`ru_msgsnd` | messages sent | - +--------+---------------------+---------------------------------------+ - | ``12`` | :attr:`ru_msgrcv` | messages received | - +--------+---------------------+---------------------------------------+ - | ``13`` | :attr:`ru_nsignals` | signals received | - +--------+---------------------+---------------------------------------+ - | ``14`` | :attr:`ru_nvcsw` | voluntary context switches | - +--------+---------------------+---------------------------------------+ - | ``15`` | :attr:`ru_nivcsw` | involuntary context switches | - +--------+---------------------+---------------------------------------+ + +--------+----------------------+---------------------------------------+ + | Index | Field | Resource | + +========+======================+=======================================+ + | ``0`` | :attr:`!ru_utime` | time in user mode (float seconds) | + +--------+----------------------+---------------------------------------+ + | ``1`` | :attr:`!ru_stime` | time in system mode (float seconds) | + +--------+----------------------+---------------------------------------+ + | ``2`` | :attr:`!ru_maxrss` | maximum resident set size | + +--------+----------------------+---------------------------------------+ + | ``3`` | :attr:`!ru_ixrss` | shared memory size | + +--------+----------------------+---------------------------------------+ + | ``4`` | :attr:`!ru_idrss` | unshared memory size | + +--------+----------------------+---------------------------------------+ + | ``5`` | :attr:`!ru_isrss` | unshared stack size | + +--------+----------------------+---------------------------------------+ + | ``6`` | :attr:`!ru_minflt` | page faults not requiring I/O | + +--------+----------------------+---------------------------------------+ + | ``7`` | :attr:`!ru_majflt` | page faults requiring I/O | + +--------+----------------------+---------------------------------------+ + | ``8`` | :attr:`!ru_nswap` | number of swap outs | + +--------+----------------------+---------------------------------------+ + | ``9`` | :attr:`!ru_inblock` | block input operations | + +--------+----------------------+---------------------------------------+ + | ``10`` | :attr:`!ru_oublock` | block output operations | + +--------+----------------------+---------------------------------------+ + | ``11`` | :attr:`!ru_msgsnd` | messages sent | + +--------+----------------------+---------------------------------------+ + | ``12`` | :attr:`!ru_msgrcv` | messages received | + +--------+----------------------+---------------------------------------+ + | ``13`` | :attr:`!ru_nsignals` | signals received | + +--------+----------------------+---------------------------------------+ + | ``14`` | :attr:`!ru_nvcsw` | voluntary context switches | + +--------+----------------------+---------------------------------------+ + | ``15`` | :attr:`!ru_nivcsw` | involuntary context switches | + +--------+----------------------+---------------------------------------+ This function will raise a :exc:`ValueError` if an invalid *who* parameter is specified. It may also raise :exc:`error` exception in unusual circumstances. diff --git a/Doc/library/security_warnings.rst b/Doc/library/security_warnings.rst index a573c98f73eb0a..70c359cc1c0fc3 100644 --- a/Doc/library/security_warnings.rst +++ b/Doc/library/security_warnings.rst @@ -28,7 +28,7 @@ The following modules have specific security considerations: ` * :mod:`tempfile`: :ref:`mktemp is deprecated due to vulnerability to race conditions ` -* :mod:`xml`: :ref:`XML vulnerabilities ` +* :mod:`xml`: :ref:`XML security ` * :mod:`zipfile`: :ref:`maliciously prepared .zip files can cause disk volume exhaustion ` diff --git a/Doc/library/select.rst b/Doc/library/select.rst index d2094283d54736..0f0c76060df733 100644 --- a/Doc/library/select.rst +++ b/Doc/library/select.rst @@ -115,7 +115,7 @@ The module defines the following: :ref:`kevent-objects` below for the methods supported by kevent objects. -.. function:: select(rlist, wlist, xlist[, timeout]) +.. function:: select(rlist, wlist, xlist, timeout=None) This is a straightforward interface to the Unix :c:func:`!select` system call. The first three arguments are iterables of 'waitable objects': either @@ -130,7 +130,8 @@ The module defines the following: Empty iterables are allowed, but acceptance of three empty iterables is platform-dependent. (It is known to work on Unix but not on Windows.) The optional *timeout* argument specifies a time-out as a floating-point number - in seconds. When the *timeout* argument is omitted the function blocks until + in seconds. + When the *timeout* argument is omitted or ``None``, the function blocks until at least one file descriptor is ready. A time-out value of zero specifies a poll and never blocks. diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2cbf95bcf535e4..3a4631e7c657fe 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -47,6 +47,13 @@ Directory and files operations 0, only the contents from the current file position to the end of the file will be copied. + :func:`copyfileobj` will *not* guarantee that the destination stream has + been flushed on completion of the copy. If you want to read from the + destination at the completion of the copy operation (for example, reading + the contents of a temporary file that has been copied from a HTTP stream), + you must ensure that you have called :func:`~io.IOBase.flush` or + :func:`~io.IOBase.close` on the file-like object before attempting to read + the destination file. .. function:: copyfile(src, dst, *, follow_symlinks=True) @@ -83,6 +90,13 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. +.. exception:: SpecialFileError + + This exception is raised when :func:`copyfile` or :func:`copytree` attempt + to copy a named pipe. + + .. versionadded:: 2.7 + .. exception:: SameFileError This exception is raised if source and destination in :func:`copyfile` @@ -327,6 +341,10 @@ Directory and files operations The deprecated *onerror* is similar to *onexc*, except that the third parameter it receives is the tuple returned from :func:`sys.exc_info`. + .. seealso:: + :ref:`shutil-rmtree-example` for an example of handling the removal + of a directory tree that contains read-only files. + .. audit-event:: shutil.rmtree path,dir_fd shutil.rmtree .. versionchanged:: 3.3 @@ -454,6 +472,10 @@ Directory and files operations :envvar:`PATH` environment variable is read from :data:`os.environ`, falling back to :data:`os.defpath` if it is not set. + If *cmd* contains a directory component, :func:`!which` only checks the + specified path directly and does not search the directories listed in + *path* or in the system's :envvar:`PATH` environment variable. + On Windows, the current directory is prepended to the *path* if *mode* does not include ``os.X_OK``. When the *mode* does include ``os.X_OK``, the Windows API ``NeedCurrentDirectoryForExePathW`` will be consulted to @@ -603,7 +625,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *format* is the archive format: one of "zip" (if the :mod:`zlib` module is available), "tar", "gztar" (if the :mod:`zlib` module is available), "bztar" (if the :mod:`bz2` module is - available), or "xztar" (if the :mod:`lzma` module is available). + available), "xztar" (if the :mod:`lzma` module is available), or "zstdtar" + (if the :mod:`compression.zstd` module is available). *root_dir* is a directory that will be the root directory of the archive, all paths in the archive will be relative to it; for example, @@ -658,6 +681,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own archiver for any existing formats, by using :func:`register_archive_format`. @@ -701,8 +726,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *extract_dir* is the name of the target directory where the archive is unpacked. If not provided, the current working directory is used. - *format* is the archive format: one of "zip", "tar", "gztar", "bztar", or - "xztar". Or any other format registered with + *format* is the archive format: one of "zip", "tar", "gztar", "bztar", + "xztar", or "zstdtar". Or any other format registered with :func:`register_unpack_format`. If not provided, :func:`unpack_archive` will use the archive file name extension and see if an unpacker was registered for that extension. In case none is found, @@ -774,6 +799,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own unpacker for any existing formats, by using :func:`register_unpack_format`. @@ -840,7 +867,7 @@ In the final archive, :file:`please_add.txt` should be included, but ... root_dir='tmp/root', ... base_dir='structure/content', ... ) - '/Users/tarek/my_archive.tar' + '/Users/tarek/myarchive.tar' Listing the files in the resulting archive gives us: diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst index c28841dbb8cfc8..1a2a555f5c0fc5 100644 --- a/Doc/library/signal.rst +++ b/Doc/library/signal.rst @@ -205,14 +205,30 @@ The variables defined in the :mod:`signal` module are: .. availability:: Unix. +.. data:: SIGPROF + + Profiling timer expired. + + .. availability:: Unix. + +.. data:: SIGQUIT + + Terminal quit signal. + + .. availability:: Unix. + .. data:: SIGSEGV Segmentation fault: invalid memory reference. +.. data:: SIGSTOP + + Stop executing (cannot be caught or ignored). + .. data:: SIGSTKFLT - Stack fault on coprocessor. The Linux kernel does not raise this signal: it - can only be raised in user space. + Stack fault on coprocessor. The Linux kernel does not raise this signal: it + can only be raised in user space. .. availability:: Linux. @@ -237,18 +253,30 @@ The variables defined in the :mod:`signal` module are: .. availability:: Unix. +.. data:: SIGVTALRM + + Virtual timer expired. + + .. availability:: Unix. + .. data:: SIGWINCH Window resize signal. .. availability:: Unix. +.. data:: SIGXCPU + + CPU time limit exceeded. + + .. availability:: Unix. + .. data:: SIG* All the signal numbers are defined symbolically. For example, the hangup signal is defined as :const:`signal.SIGHUP`; the variable names are identical to the names used in C programs, as found in ````. The Unix man page for - ':c:func:`signal`' lists the existing signals (on some systems this is + '``signal``' lists the existing signals (on some systems this is :manpage:`signal(2)`, on others the list is in :manpage:`signal(7)`). Note that not all systems define the same set of signal names; only those names defined by the system are defined by this module. @@ -639,9 +667,8 @@ The :mod:`signal` module defines the following functions: *sigset*. The return value is an object representing the data contained in the - :c:type:`siginfo_t` structure, namely: :attr:`si_signo`, :attr:`si_code`, - :attr:`si_errno`, :attr:`si_pid`, :attr:`si_uid`, :attr:`si_status`, - :attr:`si_band`. + ``siginfo_t`` structure, namely: ``si_signo``, ``si_code``, + ``si_errno``, ``si_pid``, ``si_uid``, ``si_status``, ``si_band``. .. availability:: Unix. diff --git a/Doc/library/site.rst b/Doc/library/site.rst index e98dd83b60eb60..d93e4dc7c75f1a 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -270,7 +270,7 @@ Module contents .. _site-commandline: -Command Line Interface +Command-line interface ---------------------- .. program:: site diff --git a/Doc/library/smtplib.rst b/Doc/library/smtplib.rst index c5f8516f768a68..c5a3de52090cee 100644 --- a/Doc/library/smtplib.rst +++ b/Doc/library/smtplib.rst @@ -80,8 +80,8 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions). An :class:`SMTP_SSL` instance behaves exactly the same as instances of :class:`SMTP`. :class:`SMTP_SSL` should be used for situations where SSL is - required from the beginning of the connection and using :meth:`starttls` is - not appropriate. If *host* is not specified, the local host is used. If + required from the beginning of the connection and using :meth:`~SMTP.starttls` + is not appropriate. If *host* is not specified, the local host is used. If *port* is zero, the standard SMTP-over-SSL port (465) is used. The optional arguments *local_hostname*, *timeout* and *source_address* have the same meaning as they do in the :class:`SMTP` class. *context*, also optional, @@ -112,7 +112,7 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions). The LMTP protocol, which is very similar to ESMTP, is heavily based on the standard SMTP client. It's common to use Unix sockets for LMTP, so our - :meth:`connect` method must support that as well as a regular host:port + :meth:`~SMTP.connect` method must support that as well as a regular host:port server. The optional arguments *local_hostname* and *source_address* have the same meaning as they do in the :class:`SMTP` class. To specify a Unix socket, you must use an absolute path for *host*, starting with a '/'. @@ -147,9 +147,15 @@ A nice selection of exceptions is defined as well: .. exception:: SMTPResponseException Base class for all exceptions that include an SMTP error code. These exceptions - are generated in some instances when the SMTP server returns an error code. The - error code is stored in the :attr:`smtp_code` attribute of the error, and the - :attr:`smtp_error` attribute is set to the error message. + are generated in some instances when the SMTP server returns an error code. + + .. attribute:: smtp_code + + The error code. + + .. attribute:: smtp_error + + The error message. .. exception:: SMTPSenderRefused @@ -161,9 +167,13 @@ A nice selection of exceptions is defined as well: .. exception:: SMTPRecipientsRefused - All recipient addresses refused. The errors for each recipient are accessible - through the attribute :attr:`recipients`, which is a dictionary of exactly the - same sort as :meth:`SMTP.sendmail` returns. + All recipient addresses refused. + + .. attribute:: recipients + + A dictionary of exactly the same sort as returned + by :meth:`SMTP.sendmail` containing the errors for + each recipient. .. exception:: SMTPDataError @@ -213,7 +223,6 @@ SMTP Objects An :class:`SMTP` instance has the following methods: - .. method:: SMTP.set_debuglevel(level) Set the debug output level. A value of 1 or ``True`` for *level* results in @@ -417,7 +426,7 @@ An :class:`SMTP` instance has the following methods: .. versionchanged:: 3.4 The method now supports hostname check with - :attr:`SSLContext.check_hostname` and *Server Name Indicator* (see + :attr:`ssl.SSLContext.check_hostname` and *Server Name Indicator* (see :const:`~ssl.HAS_SNI`). .. versionchanged:: 3.5 @@ -435,7 +444,7 @@ An :class:`SMTP` instance has the following methods: ESMTP options (such as ``DSN`` commands) that should be used with all ``RCPT`` commands can be passed as *rcpt_options*. (If you need to use different ESMTP options to different recipients you have to use the low-level methods such as - :meth:`mail`, :meth:`rcpt` and :meth:`data` to send the message.) + :meth:`!mail`, :meth:`!rcpt` and :meth:`!data` to send the message.) .. note:: @@ -467,10 +476,7 @@ An :class:`SMTP` instance has the following methods: This method may raise the following exceptions: :exc:`SMTPRecipientsRefused` - All recipients were refused. Nobody got the mail. The :attr:`recipients` - attribute of the exception object is a dictionary with information about the - refused recipients (like the one returned when at least one recipient was - accepted). + All recipients were refused. Nobody got the mail. :exc:`SMTPHeloError` The server didn't reply properly to the ``HELO`` greeting. @@ -546,6 +552,30 @@ Low-level methods corresponding to the standard SMTP/ESMTP commands ``HELP``, Normally these do not need to be called directly, so they are not documented here. For details, consult the module code. +Additionally, an SMTP instance has the following attributes: + + +.. attribute:: SMTP.helo_resp + + The response to the ``HELO`` command, see :meth:`helo`. + + +.. attribute:: SMTP.ehlo_resp + + The response to the ``EHLO`` command, see :meth:`ehlo`. + + +.. attribute:: SMTP.does_esmtp + + A boolean value indicating whether the server supports ESMTP, see + :meth:`ehlo`. + + +.. attribute:: SMTP.esmtp_features + + A dictionary of the names of SMTP service extensions supported by the server, + see :meth:`ehlo`. + .. _smtp-example: diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 8fd5187e3a4a36..782ea7d8fdaed2 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -362,10 +362,10 @@ Exceptions Constants ^^^^^^^^^ - The AF_* and SOCK_* constants are now :class:`AddressFamily` and - :class:`SocketKind` :class:`.IntEnum` collections. +The AF_* and SOCK_* constants are now :class:`AddressFamily` and +:class:`SocketKind` :class:`.IntEnum` collections. - .. versionadded:: 3.4 +.. versionadded:: 3.4 .. data:: AF_UNIX AF_INET @@ -773,9 +773,9 @@ Constants Constant to optimize CPU locality, to be used in conjunction with :data:`SO_REUSEPORT`. - .. versionadded:: 3.11 + .. versionadded:: 3.11 - .. availability:: Linux >= 3.9 + .. availability:: Linux >= 3.9 .. data:: SO_REUSEPORT_LB @@ -1492,7 +1492,7 @@ The :mod:`socket` module also offers various network-related services: The *fds* parameter is a sequence of file descriptors. Consult :meth:`~socket.sendmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. Unix platforms supporting :meth:`~socket.sendmsg` and :const:`SCM_RIGHTS` mechanism. @@ -1506,9 +1506,9 @@ The :mod:`socket` module also offers various network-related services: Return ``(msg, list(fds), flags, addr)``. Consult :meth:`~socket.recvmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. - Unix platforms supporting :meth:`~socket.sendmsg` + Unix platforms supporting :meth:`~socket.recvmsg` and :const:`SCM_RIGHTS` mechanism. .. versionadded:: 3.9 @@ -2086,11 +2086,8 @@ to sockets. :attr:`socket.type`. -.. method:: socket.setsockopt(level, optname, value: int) -.. method:: socket.setsockopt(level, optname, value: buffer) - :noindex: -.. method:: socket.setsockopt(level, optname, None, optlen: int) - :noindex: +.. method:: socket.setsockopt(level, optname, value: int | Buffer) + socket.setsockopt(level, optname, None, optlen: int) .. index:: pair: module; struct diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 59cfa136a3b7da..7545b5fb0a526a 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -541,10 +541,10 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -the first handler had to use a ``recv()`` loop to accumulate data until a +first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. -TCP is stream based: data arrives in the order it was sent, but there no +TCP is stream based: data arrives in the order it was sent, but there is no correlation between client ``send()`` or ``sendall()`` calls and the number of ``recv()`` calls on the server required to receive it. diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index c615650b622f7f..cbc3f32d9b9157 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -31,7 +31,9 @@ PostgreSQL or Oracle. The :mod:`!sqlite3` module was written by Gerhard Häring. It provides an SQL interface compliant with the DB-API 2.0 specification described by :pep:`249`, and -requires SQLite 3.15.2 or newer. +requires the third-party `SQLite `_ library. + +.. include:: ../includes/optional-module.rst This document includes four main sections: @@ -614,7 +616,7 @@ Connection objects supplied, this must be a :term:`callable` returning an instance of :class:`Cursor` or its subclasses. - .. method:: blobopen(table, column, row, /, *, readonly=False, name="main") + .. method:: blobopen(table, column, rowid, /, *, readonly=False, name="main") Open a :class:`Blob` handle to an existing :abbr:`BLOB (Binary Large OBject)`. @@ -625,8 +627,8 @@ Connection objects :param str column: The name of the column where the blob is located. - :param str row: - The name of the row where the blob is located. + :param int rowid: + The row id where the blob is located. :param bool readonly: Set to ``True`` if the blob should be opened without write @@ -1492,7 +1494,9 @@ Cursor objects :type parameters: :class:`dict` | :term:`sequence` :raises ProgrammingError: - If *sql* contains more than one SQL statement. + When *sql* contains more than one SQL statement. + When :ref:`named placeholders ` are used + and *parameters* is a sequence instead of a :class:`dict`. If :attr:`~Connection.autocommit` is :data:`LEGACY_TRANSACTION_CONTROL`, @@ -1501,13 +1505,11 @@ Cursor objects and there is no open transaction, a transaction is implicitly opened before executing *sql*. - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and *parameters* is a sequence instead of a :class:`dict`. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. Use :meth:`executescript` to execute multiple SQL statements. @@ -1529,8 +1531,10 @@ Cursor objects :type parameters: :term:`iterable` :raises ProgrammingError: - If *sql* contains more than one SQL statement, - or is not a DML statement. + When *sql* contains more than one SQL statement + or is not a DML statement, + When :ref:`named placeholders ` are used + and the items in *parameters* are sequences instead of :class:`dict`\s. Example: @@ -1554,14 +1558,12 @@ Cursor objects .. _RETURNING clauses: https://www.sqlite.org/lang_returning.html - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and the items in *parameters* are sequences instead of :class:`dict`\s. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. .. method:: executescript(sql_script, /) @@ -1612,6 +1614,9 @@ Cursor objects If the *size* parameter is used, then it is best for it to retain the same value from one :meth:`fetchmany` call to the next. + .. versionchanged:: 3.14.1 + Negative *size* values are rejected by raising :exc:`ValueError`. + .. method:: fetchall() Return all (remaining) rows of a query result as a :class:`list`. @@ -1639,6 +1644,9 @@ Cursor objects Read/write attribute that controls the number of rows returned by :meth:`fetchmany`. The default value is 1 which means a single row would be fetched per call. + .. versionchanged:: 3.14.1 + Negative values are rejected by raising :exc:`ValueError`. + .. attribute:: connection Read-only attribute that provides the SQLite database :class:`Connection` @@ -2289,7 +2297,7 @@ This section shows recipes for common adapters and converters. def adapt_datetime_iso(val): """Adapt datetime.datetime to timezone-naive ISO 8601 date.""" - return val.isoformat() + return val.replace(tzinfo=None).isoformat() def adapt_datetime_epoch(val): """Adapt datetime.datetime to Unix timestamp.""" diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index c0dcecf737ef76..93cdf6d85f01dd 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -18,8 +18,9 @@ This module provides access to Transport Layer Security (often known as "Secure Sockets Layer") encryption and peer authentication facilities for network sockets, both client-side and server-side. This module uses the OpenSSL -library. It is available on all modern Unix systems, Windows, macOS, and -probably additional platforms, as long as OpenSSL is installed on that platform. +library. + +.. include:: ../includes/optional-module.rst .. note:: @@ -125,7 +126,8 @@ Context creation A convenience function helps create :class:`SSLContext` objects for common purposes. -.. function:: create_default_context(purpose=Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) +.. function:: create_default_context(purpose=Purpose.SERVER_AUTH, *,\ + cafile=None, capath=None, cadata=None) Return a new :class:`SSLContext` object with default settings for the given *purpose*. The settings are chosen by the :mod:`ssl` module, @@ -314,7 +316,7 @@ Exceptions Random generation ^^^^^^^^^^^^^^^^^ -.. function:: RAND_bytes(num) +.. function:: RAND_bytes(num, /) Return *num* cryptographically strong pseudo-random bytes. Raises an :class:`SSLError` if the PRNG has not been seeded with enough data or if the @@ -338,7 +340,7 @@ Random generation :func:`ssl.RAND_egd` and :func:`ssl.RAND_add` to increase the randomness of the pseudo-random number generator. -.. function:: RAND_add(bytes, entropy) +.. function:: RAND_add(bytes, entropy, /) Mix the given *bytes* into the SSL pseudo-random number generator. The parameter *entropy* (a float) is a lower bound on the entropy contained in @@ -406,12 +408,12 @@ Certificate handling .. versionchanged:: 3.10 The *timeout* parameter was added. -.. function:: DER_cert_to_PEM_cert(DER_cert_bytes) +.. function:: DER_cert_to_PEM_cert(der_cert_bytes) Given a certificate as a DER-encoded blob of bytes, returns a PEM-encoded string version of the same certificate. -.. function:: PEM_cert_to_DER_cert(PEM_cert_string) +.. function:: PEM_cert_to_DER_cert(pem_cert_string) Given a certificate as an ASCII PEM string, returns a DER-encoded sequence of bytes for that same certificate. @@ -1128,10 +1130,10 @@ SSL sockets also have the following additional methods and attributes: .. deprecated:: 3.6 Use :meth:`~SSLSocket.recv` instead of :meth:`~SSLSocket.read`. -.. method:: SSLSocket.write(buf) +.. method:: SSLSocket.write(data) - Write *buf* to the SSL socket and return the number of bytes written. The - *buf* argument must be an object supporting the buffer interface. + Write *data* to the SSL socket and return the number of bytes written. The + *data* argument must be an object supporting the buffer interface. Raise :exc:`SSLWantReadError` or :exc:`SSLWantWriteError` if the socket is :ref:`non-blocking ` and the write would block. @@ -1141,7 +1143,7 @@ SSL sockets also have the following additional methods and attributes: .. versionchanged:: 3.5 The socket timeout is no longer reset each time bytes are received or sent. - The socket timeout is now the maximum total duration to write *buf*. + The socket timeout is now the maximum total duration to write *data*. .. deprecated:: 3.6 Use :meth:`~SSLSocket.send` instead of :meth:`~SSLSocket.write`. @@ -1158,12 +1160,15 @@ SSL sockets also have the following additional methods and attributes: :meth:`~socket.socket.recv` and :meth:`~socket.socket.send` instead of these methods. -.. method:: SSLSocket.do_handshake() +.. method:: SSLSocket.do_handshake(block=False) Perform the SSL setup handshake. + If *block* is true and the timeout obtained by :meth:`~socket.socket.gettimeout` + is zero, the socket is set in blocking mode until the handshake is performed. + .. versionchanged:: 3.4 - The handshake method also performs :func:`match_hostname` when the + The handshake method also performs :func:`!match_hostname` when the :attr:`~SSLContext.check_hostname` attribute of the socket's :attr:`~SSLSocket.context` is true. @@ -1173,7 +1178,7 @@ SSL sockets also have the following additional methods and attributes: .. versionchanged:: 3.7 Hostname or IP address is matched by OpenSSL during handshake. The - function :func:`match_hostname` is no longer used. In case OpenSSL + function :func:`!match_hostname` is no longer used. In case OpenSSL refuses a hostname or IP address, the handshake is aborted early and a TLS alert message is sent to the peer. @@ -1643,7 +1648,7 @@ to speed up repeated connections from the same clients. provided as part of the operating system, though, it is likely to be configured properly. -.. method:: SSLContext.set_ciphers(ciphers) +.. method:: SSLContext.set_ciphers(ciphers, /) Set the available ciphers for sockets created with this context. It should be a string in the `OpenSSL cipher list format @@ -1659,7 +1664,7 @@ to speed up repeated connections from the same clients. TLS 1.3 cipher suites cannot be disabled with :meth:`~SSLContext.set_ciphers`. -.. method:: SSLContext.set_alpn_protocols(protocols) +.. method:: SSLContext.set_alpn_protocols(alpn_protocols) Specify which protocols the socket should advertise during the SSL/TLS handshake. It should be a list of ASCII strings, like ``['http/1.1', @@ -1673,7 +1678,7 @@ to speed up repeated connections from the same clients. .. versionadded:: 3.5 -.. method:: SSLContext.set_npn_protocols(protocols) +.. method:: SSLContext.set_npn_protocols(npn_protocols) Specify which protocols the socket should advertise during the SSL/TLS handshake. It should be a list of strings, like ``['http/1.1', 'spdy/2']``, @@ -1740,7 +1745,7 @@ to speed up repeated connections from the same clients. .. versionadded:: 3.7 -.. attribute:: SSLContext.set_servername_callback(server_name_callback) +.. method:: SSLContext.set_servername_callback(server_name_callback) This is a legacy API retained for backwards compatibility. When possible, you should use :attr:`sni_callback` instead. The given *server_name_callback* @@ -1754,7 +1759,7 @@ to speed up repeated connections from the same clients. .. versionadded:: 3.4 -.. method:: SSLContext.load_dh_params(dhfile) +.. method:: SSLContext.load_dh_params(dhfile, /) Load the key generation parameters for Diffie-Hellman (DH) key exchange. Using DH key exchange improves forward secrecy at the expense of @@ -1767,7 +1772,7 @@ to speed up repeated connections from the same clients. .. versionadded:: 3.3 -.. method:: SSLContext.set_ecdh_curve(curve_name) +.. method:: SSLContext.set_ecdh_curve(curve_name, /) Set the curve name for Elliptic Curve-based Diffie-Hellman (ECDH) key exchange. ECDH is significantly faster than regular DH while arguably @@ -1846,8 +1851,9 @@ to speed up repeated connections from the same clients. .. attribute:: SSLContext.sslsocket_class The return type of :meth:`SSLContext.wrap_socket`, defaults to - :class:`SSLSocket`. The attribute can be overridden on instance of class - in order to return a custom subclass of :class:`SSLSocket`. + :class:`SSLSocket`. The attribute can be assigned to on instances of + :class:`SSLContext` in order to return a custom subclass of + :class:`SSLSocket`. .. versionadded:: 3.7 @@ -2640,12 +2646,12 @@ purpose. It wraps an OpenSSL memory BIO (Basic IO) object: A boolean indicating whether the memory BIO is current at the end-of-file position. - .. method:: MemoryBIO.read(n=-1) + .. method:: MemoryBIO.read(n=-1, /) Read up to *n* bytes from the memory buffer. If *n* is not specified or negative, all bytes are returned. - .. method:: MemoryBIO.write(buf) + .. method:: MemoryBIO.write(buf, /) Write the bytes from *buf* to the memory BIO. The *buf* argument must be an object supporting the buffer protocol. @@ -2728,7 +2734,7 @@ This common check is automatically performed when .. versionchanged:: 3.7 Hostname matchings is now performed by OpenSSL. Python no longer uses - :func:`match_hostname`. + :func:`!match_hostname`. In server mode, if you want to authenticate your clients using the SSL layer (rather than using a higher-level authentication mechanism), you'll also have @@ -2819,16 +2825,16 @@ of TLS/SSL. Some new TLS 1.3 features are not yet available. Steve Kent :rfc:`RFC 4086: Randomness Requirements for Security <4086>` - Donald E., Jeffrey I. Schiller + Donald E. Eastlake, Jeffrey I. Schiller, Steve Crocker :rfc:`RFC 5280: Internet X.509 Public Key Infrastructure Certificate and Certificate Revocation List (CRL) Profile <5280>` - D. Cooper + David Cooper et al. :rfc:`RFC 5246: The Transport Layer Security (TLS) Protocol Version 1.2 <5246>` - T. Dierks et. al. + Tim Dierks and Eric Rescorla. :rfc:`RFC 6066: Transport Layer Security (TLS) Extensions <6066>` - D. Eastlake + Donald E. Eastlake `IANA TLS: Transport Layer Security (TLS) Parameters `_ IANA diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 39aaa5da0786f8..9bb903688bc0d3 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1000,8 +1000,6 @@ operations have the same priority as the corresponding numeric operations. [3]_ pair: slice; operation pair: operator; in pair: operator; not in - single: count() (sequence method) - single: index() (sequence method) +--------------------------+--------------------------------+----------+ | Operation | Result | Notes | @@ -1018,7 +1016,7 @@ operations have the same priority as the corresponding numeric operations. [3]_ | ``s * n`` or | equivalent to adding *s* to | (2)(7) | | ``n * s`` | itself *n* times | | +--------------------------+--------------------------------+----------+ -| ``s[i]`` | *i*\ th item of *s*, origin 0 | \(3) | +| ``s[i]`` | *i*\ th item of *s*, origin 0 | (3)(8) | +--------------------------+--------------------------------+----------+ | ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) | +--------------------------+--------------------------------+----------+ @@ -1031,13 +1029,6 @@ operations have the same priority as the corresponding numeric operations. [3]_ +--------------------------+--------------------------------+----------+ | ``max(s)`` | largest item of *s* | | +--------------------------+--------------------------------+----------+ -| ``s.index(x[, i[, j]])`` | index of the first occurrence | \(8) | -| | of *x* in *s* (at or after | | -| | index *i* and before index *j*)| | -+--------------------------+--------------------------------+----------+ -| ``s.count(x)`` | total number of occurrences of | | -| | *x* in *s* | | -+--------------------------+--------------------------------+----------+ Sequences of the same type also support comparisons. In particular, tuples and lists are compared lexicographically by comparing corresponding elements. @@ -1143,12 +1134,41 @@ Notes: concatenation or repetition. (8) - ``index`` raises :exc:`ValueError` when *x* is not found in *s*. - Not all implementations support passing the additional arguments *i* and *j*. - These arguments allow efficient searching of subsections of the sequence. Passing - the extra arguments is roughly equivalent to using ``s[i:j].index(x)``, only - without copying any data and with the returned index being relative to - the start of the sequence rather than the start of the slice. + An :exc:`IndexError` is raised if *i* is outside the sequence range. + +.. rubric:: Sequence Methods + +Sequence types also support the following methods: + +.. method:: list.count(value, /) + range.count(value, /) + tuple.count(value, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.count(value, /) + + Return the total number of occurrences of *value* in *sequence*. + +.. method:: list.index(value[, start[, stop]) + range.index(value[, start[, stop]) + tuple.index(value[, start[, stop]) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.index(value[, start[, stop]) + + Return the index of the first occurrence of *value* in *sequence*. + + Raises :exc:`ValueError` if *value* is not found in *sequence*. + + The *start* or *stop* arguments allow for efficient searching + of subsections of the sequence, beginning at *start* and ending at *stop*. + This is roughly equivalent to ``start + sequence[start:stop].index(value)``, + only without copying any data. + + .. caution:: + Not all sequence types support passing the *start* and *stop* arguments. .. _typesseq-immutable: @@ -1199,14 +1219,6 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). pair: subscript; assignment pair: slice; assignment pair: statement; del - single: append() (sequence method) - single: clear() (sequence method) - single: copy() (sequence method) - single: extend() (sequence method) - single: insert() (sequence method) - single: pop() (sequence method) - single: remove() (sequence method) - single: reverse() (sequence method) +------------------------------+--------------------------------+---------------------+ | Operation | Result | Notes | @@ -1214,11 +1226,15 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). | ``s[i] = x`` | item *i* of *s* is replaced by | | | | *x* | | +------------------------------+--------------------------------+---------------------+ +| ``del s[i]`` | removes item *i* of *s* | | ++------------------------------+--------------------------------+---------------------+ | ``s[i:j] = t`` | slice of *s* from *i* to *j* | | | | is replaced by the contents of | | | | the iterable *t* | | +------------------------------+--------------------------------+---------------------+ -| ``del s[i:j]`` | same as ``s[i:j] = []`` | | +| ``del s[i:j]`` | removes the elements of | | +| | ``s[i:j]`` from the list | | +| | (same as ``s[i:j] = []``) | | +------------------------------+--------------------------------+---------------------+ | ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` | \(1) | | | are replaced by those of *t* | | @@ -1226,39 +1242,14 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). | ``del s[i:j:k]`` | removes the elements of | | | | ``s[i:j:k]`` from the list | | +------------------------------+--------------------------------+---------------------+ -| ``s.append(x)`` | appends *x* to the end of the | | -| | sequence (same as | | -| | ``s[len(s):len(s)] = [x]``) | | -+------------------------------+--------------------------------+---------------------+ -| ``s.clear()`` | removes all items from *s* | \(5) | -| | (same as ``del s[:]``) | | -+------------------------------+--------------------------------+---------------------+ -| ``s.copy()`` | creates a shallow copy of *s* | \(5) | -| | (same as ``s[:]``) | | -+------------------------------+--------------------------------+---------------------+ -| ``s.extend(t)`` or | extends *s* with the | | -| ``s += t`` | contents of *t* (for the | | +| ``s += t`` | extends *s* with the | | +| | contents of *t* (for the | | | | most part the same as | | | | ``s[len(s):len(s)] = t``) | | +------------------------------+--------------------------------+---------------------+ -| ``s *= n`` | updates *s* with its contents | \(6) | +| ``s *= n`` | updates *s* with its contents | \(2) | | | repeated *n* times | | +------------------------------+--------------------------------+---------------------+ -| ``s.insert(i, x)`` | inserts *x* into *s* at the | | -| | index given by *i* | | -| | (same as ``s[i:i] = [x]``) | | -+------------------------------+--------------------------------+---------------------+ -| ``s.pop()`` or ``s.pop(i)`` | retrieves the item at *i* and | \(2) | -| | also removes it from *s* | | -+------------------------------+--------------------------------+---------------------+ -| ``s.remove(x)`` | removes the first item from | \(3) | -| | *s* where ``s[i]`` is equal to | | -| | *x* | | -+------------------------------+--------------------------------+---------------------+ -| ``s.reverse()`` | reverses the items of *s* in | \(4) | -| | place | | -+------------------------------+--------------------------------+---------------------+ - Notes: @@ -1266,32 +1257,105 @@ Notes: If *k* is not equal to ``1``, *t* must have the same length as the slice it is replacing. (2) - The optional argument *i* defaults to ``-1``, so that by default the last - item is removed and returned. + The value *n* is an integer, or an object implementing + :meth:`~object.__index__`. Zero and negative values of *n* clear + the sequence. Items in the sequence are not copied; they are referenced + multiple times, as explained for ``s * n`` under :ref:`typesseq-common`. -(3) - :meth:`remove` raises :exc:`ValueError` when *x* is not found in *s*. +.. rubric:: Mutable Sequence Methods -(4) - The :meth:`reverse` method modifies the sequence in place for economy of - space when reversing a large sequence. To remind users that it operates by - side effect, it does not return the reversed sequence. +Mutable sequence types also support the following methods: -(5) - :meth:`clear` and :meth:`!copy` are included for consistency with the - interfaces of mutable containers that don't support slicing operations - (such as :class:`dict` and :class:`set`). :meth:`!copy` is not part of the - :class:`collections.abc.MutableSequence` ABC, but most concrete - mutable sequence classes provide it. +.. method:: bytearray.append(value, /) + list.append(value, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.append(value, /) + + Append *value* to the end of the sequence + This is equivalent to writing ``seq[len(seq):len(seq)] = [value]``. + +.. method:: bytearray.clear() + list.clear() + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.clear() .. versionadded:: 3.3 - :meth:`clear` and :meth:`!copy` methods. -(6) - The value *n* is an integer, or an object implementing - :meth:`~object.__index__`. Zero and negative values of *n* clear - the sequence. Items in the sequence are not copied; they are referenced - multiple times, as explained for ``s * n`` under :ref:`typesseq-common`. + Remove all items from *sequence*. + This is equivalent to writing ``del sequence[:]``. + +.. method:: bytearray.copy() + list.copy() + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.copy() + + .. versionadded:: 3.3 + + Create a shallow copy of *sequence*. + This is equivalent to writing ``sequence[:]``. + + .. hint:: The :meth:`!copy` method is not part of the + :class:`~collections.abc.MutableSequence` :class:`~abc.ABC`, + but most concrete mutable sequence types provide it. + +.. method:: bytearray.extend(iterable, /) + list.extend(iterable, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.extend(iterable, /) + + Extend *sequence* with the contents of *iterable*. + For the most part, this is the same as writing + ``seq[len(seq):len(seq)] = iterable``. + +.. method:: bytearray.insert(index, value, /) + list.insert(index, value, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.insert(index, value, /) + + Insert *value* into *sequence* at the given *index*. + This is equivalent to writing ``sequence[index:index] = [value]``. + +.. method:: bytearray.pop(index=-1, /) + list.pop(index=-1, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.pop(index=-1, /) + + Retrieve the item at *index* and also removes it from *sequence*. + By default, the last item in *sequence* is removed and returned. + +.. method:: bytearray.remove(value, /) + list.remove(value, /) + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.remove(value, /) + + Remove the first item from *sequence* where ``sequence[i] == value``. + + Raises :exc:`ValueError` if *value* is not found in *sequence*. + +.. method:: bytearray.reverse() + list.reverse() + :no-contents-entry: + :no-index-entry: + :no-typesetting: +.. method:: sequence.reverse() + + Reverse the items of *sequence* in place. + This method maintains economy of space when reversing a large sequence. + To remind users that it operates by side-effect, it returns ``None``. .. _typesseq-list: @@ -1305,7 +1369,7 @@ Lists are mutable sequences, typically used to store collections of homogeneous items (where the precise degree of similarity will vary by application). -.. class:: list([iterable]) +.. class:: list(iterable=(), /) Lists may be constructed in several ways: @@ -1386,7 +1450,7 @@ built-in). Tuples are also used for cases where an immutable sequence of homogeneous data is needed (such as allowing storage in a :class:`set` or :class:`dict` instance). -.. class:: tuple([iterable]) +.. class:: tuple(iterable=(), /) Tuples may be constructed in a number of ways: @@ -1428,8 +1492,8 @@ The :class:`range` type represents an immutable sequence of numbers and is commonly used for looping a specific number of times in :keyword:`for` loops. -.. class:: range(stop) - range(start, stop[, step]) +.. class:: range(stop, /) + range(start, stop, step=1, /) The arguments to the range constructor must be integers (either built-in :class:`int` or any object that implements the :meth:`~object.__index__` special @@ -1689,8 +1753,10 @@ multiple fragments. .. index:: single: string; str (built-in class) -.. class:: str(object='') - str(object=b'', encoding='utf-8', errors='strict') +.. class:: str(*, encoding='utf-8', errors='strict') + str(object) + str(object, encoding, errors='strict') + str(object, *, errors) Return a :ref:`string ` version of *object*. If *object* is not provided, returns the empty string. Otherwise, the behavior of ``str()`` @@ -1784,12 +1850,18 @@ expression support in the :mod:`re` module). .. versionadded:: 3.3 -.. method:: str.center(width[, fillchar]) +.. method:: str.center(width, fillchar=' ', /) Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is - returned if *width* is less than or equal to ``len(s)``. + returned if *width* is less than or equal to ``len(s)``. For example:: + >>> 'Python'.center(10) + ' Python ' + >>> 'Python'.center(10, '-') + '--Python--' + >>> 'Python'.center(4) + 'Python' .. method:: str.count(sub[, start[, end]]) @@ -1799,8 +1871,18 @@ expression support in the :mod:`re` module). interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between characters - which is the length of the string plus one. - + which is the length of the string plus one. For example:: + + >>> 'spam, spam, spam'.count('spam') + 3 + >>> 'spam, spam, spam'.count('spam', 5) + 2 + >>> 'spam, spam, spam'.count('spam', 5, 10) + 1 + >>> 'spam, spam, spam'.count('eggs') + 0 + >>> 'spam, spam, spam'.count('') + 17 .. method:: str.encode(encoding="utf-8", errors="strict") @@ -1820,6 +1902,14 @@ expression support in the :mod:`re` module). unless an encoding error actually occurs, :ref:`devmode` is enabled or a :ref:`debug build ` is used. + For example:: + + >>> encoded_str_to_bytes = 'Python'.encode() + >>> type(encoded_str_to_bytes) + + >>> encoded_str_to_bytes + b'Python' + .. versionchanged:: 3.1 Added support for keyword arguments. @@ -1834,7 +1924,19 @@ expression support in the :mod:`re` module). Return ``True`` if the string ends with the specified *suffix*, otherwise return ``False``. *suffix* can also be a tuple of suffixes to look for. With optional *start*, test beginning at that position. With optional *end*, stop comparing - at that position. + at that position. Using *start* and *end* is equivalent to + ``str[start:end].endswith(suffix)``. For example:: + + >>> 'Python'.endswith('on') + True + >>> 'a tuple of suffixes'.endswith(('at', 'in')) + False + >>> 'a tuple of suffixes'.endswith(('at', 'es')) + True + >>> 'Python is amazing'.endswith('is', 0, 9) + True + + See also :meth:`startswith` and :meth:`removesuffix`. .. method:: str.expandtabs(tabsize=8) @@ -1850,12 +1952,15 @@ expression support in the :mod:`re` module). (``\n``) or return (``\r``), it is copied and the current column is reset to zero. Any other character is copied unchanged and the current column is incremented by one regardless of how the character is represented when - printed. + printed. For example:: >>> '01\t012\t0123\t01234'.expandtabs() '01 012 0123 01234' >>> '01\t012\t0123\t01234'.expandtabs(4) '01 012 0123 01234' + >>> print('01\t012\n0123\t01234'.expandtabs(4)) + 01 012 + 0123 01234 .. method:: str.find(sub[, start[, end]]) @@ -1863,6 +1968,14 @@ expression support in the :mod:`re` module). Return the lowest index in the string where substring *sub* is found within the slice ``s[start:end]``. Optional arguments *start* and *end* are interpreted as in slice notation. Return ``-1`` if *sub* is not found. + For example:: + + >>> 'spam, spam, spam'.find('sp') + 0 + >>> 'spam, spam, spam'.find('sp', 5) + 6 + + See also :meth:`rfind` and :meth:`index`. .. note:: @@ -1881,10 +1994,16 @@ expression support in the :mod:`re` module). ``{}``. Each replacement field contains either the numeric index of a positional argument, or the name of a keyword argument. Returns a copy of the string where each replacement field is replaced with the string value of - the corresponding argument. + the corresponding argument. For example: + + .. doctest:: >>> "The sum of 1 + 2 is {0}".format(1+2) 'The sum of 1 + 2 is 3' + >>> "The sum of {a} + {b} is {answer}".format(answer=1+2, a=1, b=2) + 'The sum of 1 + 2 is 3' + >>> "{1} expects the {0} Inquisition!".format("Spanish", "Nobody") + 'Nobody expects the Spanish Inquisition!' See :ref:`formatstrings` for a description of the various formatting options that can be specified in format strings. @@ -1944,13 +2063,32 @@ expression support in the :mod:`re` module). from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and Ideographic' of the Unicode Standard `_. + For example: + + .. doctest:: + + >>> 'Letters and spaces'.isalpha() + False + >>> 'LettersOnly'.isalpha() + True + >>> 'µ'.isalpha() # non-ASCII characters can be considered alphabetical too + True + + See :ref:`unicode-properties`. .. method:: str.isascii() Return ``True`` if the string is empty or all characters in the string are ASCII, ``False`` otherwise. - ASCII characters have code points in the range U+0000-U+007F. + ASCII characters have code points in the range U+0000-U+007F. For example: + + .. doctest:: + + >>> 'ASCII characters'.isascii() + True + >>> 'µ'.isascii() + False .. versionadded:: 3.7 @@ -1960,9 +2098,18 @@ expression support in the :mod:`re` module). Return ``True`` if all characters in the string are decimal characters and there is at least one character, ``False`` otherwise. Decimal characters are those that can be used to form - numbers in base 10, e.g. U+0660, ARABIC-INDIC DIGIT + numbers in base 10, such as U+0660, ARABIC-INDIC DIGIT ZERO. Formally a decimal character is a character in the Unicode - General Category "Nd". + General Category "Nd". For example: + + .. doctest:: + + >>> '0123456789'.isdecimal() + True + >>> '٠١٢٣٤٥٦٧٨٩'.isdecimal() # Arabic-Indic digits zero to nine + True + >>> 'alphabetic'.isdecimal() + False .. method:: str.isdigit() @@ -2012,7 +2159,7 @@ expression support in the :mod:`re` module). .. method:: str.isprintable() - Return true if all characters in the string are printable, false if it + Return ``True`` if all characters in the string are printable, ``False`` if it contains at least one non-printable character. Here "printable" means the character is suitable for :func:`repr` to use in @@ -2044,6 +2191,19 @@ expression support in the :mod:`re` module). character, for example uppercase characters may only follow uncased characters and lowercase characters only cased ones. Return ``False`` otherwise. + For example: + + .. doctest:: + + >>> 'Spam, Spam, Spam'.istitle() + True + >>> 'spam, spam, spam'.istitle() + False + >>> 'SPAM, SPAM, SPAM'.istitle() + False + + See also :meth:`title`. + .. method:: str.isupper() @@ -2063,15 +2223,24 @@ expression support in the :mod:`re` module). .. _meth-str-join: -.. method:: str.join(iterable) +.. method:: str.join(iterable, /) Return a string which is the concatenation of the strings in *iterable*. A :exc:`TypeError` will be raised if there are any non-string values in *iterable*, including :class:`bytes` objects. The separator between - elements is the string providing this method. + elements is the string providing this method. For example: + + .. doctest:: + + >>> ', '.join(['spam', 'spam', 'spam']) + 'spam, spam, spam' + >>> '-'.join('Python') + 'P-y-t-h-o-n' + See also :meth:`split`. -.. method:: str.ljust(width[, fillchar]) + +.. method:: str.ljust(width, fillchar=' ', /) Return the string left justified in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The @@ -2088,7 +2257,7 @@ expression support in the :mod:`re` module). `__. -.. method:: str.lstrip([chars]) +.. method:: str.lstrip(chars=None, /) Return a copy of the string with leading characters removed. The *chars* argument is a string specifying the set of characters to be removed. If omitted @@ -2109,7 +2278,8 @@ expression support in the :mod:`re` module). 'three!' -.. staticmethod:: str.maketrans(x[, y[, z]]) +.. staticmethod:: str.maketrans(dict, /) + str.maketrans(from, to, remove='', /) This static method returns a translation table usable for :meth:`str.translate`. @@ -2119,12 +2289,12 @@ expression support in the :mod:`re` module). converted to ordinals. If there are two arguments, they must be strings of equal length, and in the - resulting dictionary, each character in x will be mapped to the character at - the same position in y. If there is a third argument, it must be a string, + resulting dictionary, each character in *from* will be mapped to the character at + the same position in *to*. If there is a third argument, it must be a string, whose characters will be mapped to ``None`` in the result. -.. method:: str.partition(sep) +.. method:: str.partition(sep, /) Split the string at the first occurrence of *sep*, and return a 3-tuple containing the part before the separator, the separator itself, and the part @@ -2160,7 +2330,7 @@ expression support in the :mod:`re` module). .. versionadded:: 3.9 -.. method:: str.replace(old, new, count=-1) +.. method:: str.replace(old, new, /, count=-1) Return a copy of the string with all occurrences of substring *old* replaced by *new*. If *count* is given, only the first *count* occurrences are replaced. @@ -2183,14 +2353,14 @@ expression support in the :mod:`re` module). found. -.. method:: str.rjust(width[, fillchar]) +.. method:: str.rjust(width, fillchar=' ', /) Return the string right justified in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is returned if *width* is less than or equal to ``len(s)``. -.. method:: str.rpartition(sep) +.. method:: str.rpartition(sep, /) Split the string at the last occurrence of *sep*, and return a 3-tuple containing the part before the separator, the separator itself, and the part @@ -2207,7 +2377,7 @@ expression support in the :mod:`re` module). :meth:`split` which is described in detail below. -.. method:: str.rstrip([chars]) +.. method:: str.rstrip(chars=None, /) Return a copy of the string with trailing characters removed. The *chars* argument is a string specifying the set of characters to be removed. If omitted @@ -2269,6 +2439,20 @@ expression support in the :mod:`re` module). >>> ' 1 2 3 '.split() ['1', '2', '3'] + If *sep* is not specified or is ``None`` and *maxsplit* is ``0``, only + leading runs of consecutive whitespace are considered. + + For example:: + + >>> "".split(None, 0) + [] + >>> " ".split(None, 0) + [] + >>> " foo ".split(maxsplit=0) + ['foo '] + + See also :meth:`join`. + .. index:: single: universal newlines; str.splitlines method @@ -2344,7 +2528,7 @@ expression support in the :mod:`re` module). string at that position. -.. method:: str.strip([chars]) +.. method:: str.strip(chars=None, /) Return a copy of the string with the leading and trailing characters removed. The *chars* argument is a string specifying the set of characters to be removed. @@ -2408,8 +2592,10 @@ expression support in the :mod:`re` module). >>> titlecase("they're bill's friends.") "They're Bill's Friends." + See also :meth:`istitle`. + -.. method:: str.translate(table) +.. method:: str.translate(table, /) Return a copy of the string in which each character has been mapped through the given translation table. The table must be an object that implements @@ -2440,7 +2626,7 @@ expression support in the :mod:`re` module). `__. -.. method:: str.zfill(width) +.. method:: str.zfill(width, /) Return a copy of the string left filled with ASCII ``'0'`` digits to make a string of length *width*. A leading sign prefix (``'+'``/``'-'``) @@ -2470,6 +2656,8 @@ expression support in the :mod:`re` module). single: : (colon); in formatted string literal single: = (equals); for help in debugging using string literals +.. _stdtypes-fstrings: + Formatted String Literals (f-strings) ------------------------------------- @@ -2478,123 +2666,147 @@ Formatted String Literals (f-strings) The :keyword:`await` and :keyword:`async for` can be used in expressions within f-strings. .. versionchanged:: 3.8 - Added the debugging operator (``=``) + Added the debug specifier (``=``) .. versionchanged:: 3.12 Many restrictions on expressions within f-strings have been removed. Notably, nested strings, comments, and backslashes are now permitted. An :dfn:`f-string` (formally a :dfn:`formatted string literal`) is a string literal that is prefixed with ``f`` or ``F``. -This type of string literal allows embedding arbitrary Python expressions -within *replacement fields*, which are delimited by curly brackets (``{}``). -These expressions are evaluated at runtime, similarly to :meth:`str.format`, -and are converted into regular :class:`str` objects. -For example: - -.. doctest:: - - >>> who = 'nobody' - >>> nationality = 'Spanish' - >>> f'{who.title()} expects the {nationality} Inquisition!' - 'Nobody expects the Spanish Inquisition!' - -It is also possible to use a multi line f-string: - -.. doctest:: - - >>> f'''This is a string - ... on two lines''' - 'This is a string\non two lines' +This type of string literal allows embedding the results of arbitrary Python +expressions within *replacement fields*, which are delimited by curly +brackets (``{}``). +Each replacement field must contain an expression, optionally followed by: -A single opening curly bracket, ``'{'``, marks a *replacement field* that -can contain any Python expression: +* a *debug specifier* -- an equal sign (``=``); +* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or +* a *format specifier* prefixed with a colon (``:``). -.. doctest:: +See the :ref:`Lexical Analysis section on f-strings ` for details +on the syntax of these fields. - >>> nationality = 'Spanish' - >>> f'The {nationality} Inquisition!' - 'The Spanish Inquisition!' +Debug specifier +^^^^^^^^^^^^^^^ -To include a literal ``{`` or ``}``, use a double bracket: +.. versionadded:: 3.8 -.. doctest:: +If a debug specifier -- an equal sign (``=``) -- appears after the replacement +field expression, the resulting f-string will contain the expression's source, +the equal sign, and the value of the expression. +This is often useful for debugging:: - >>> x = 42 - >>> f'{{x}} is {x}' - '{x} is 42' + >>> number = 14.3 + >>> f'{number=}' + 'number=14.3' -Functions can also be used, and :ref:`format specifiers `: +Whitespace before, inside and after the expression, as well as whitespace +after the equal sign, is significant --- it is retained in the result:: -.. doctest:: + >>> f'{ number - 4 = }' + ' number - 4 = 10.3' - >>> from math import sqrt - >>> f'√2 \N{ALMOST EQUAL TO} {sqrt(2):.5f}' - '√2 ≈ 1.41421' -Any non-string expression is converted using :func:`str`, by default: +Conversion specifier +^^^^^^^^^^^^^^^^^^^^ -.. doctest:: +By default, the value of a replacement field expression is converted to +a string using :func:`str`:: >>> from fractions import Fraction - >>> f'{Fraction(1, 3)}' + >>> one_third = Fraction(1, 3) + >>> f'{one_third}' '1/3' -To use an explicit conversion, use the ``!`` (exclamation mark) operator, -followed by any of the valid formats, which are: +When a debug specifier but no format specifier is used, the default conversion +instead uses :func:`repr`:: -========== ============== -Conversion Meaning -========== ============== -``!a`` :func:`ascii` -``!r`` :func:`repr` -``!s`` :func:`str` -========== ============== + >>> f'{one_third = }' + 'one_third = Fraction(1, 3)' -For example: +The conversion can be specified explicitly using one of these specifiers: -.. doctest:: +* ``!s`` for :func:`str` +* ``!r`` for :func:`repr` +* ``!a`` for :func:`ascii` - >>> from fractions import Fraction - >>> f'{Fraction(1, 3)!s}' +For example:: + + >>> str(one_third) '1/3' - >>> f'{Fraction(1, 3)!r}' + >>> repr(one_third) 'Fraction(1, 3)' - >>> question = '¿Dónde está el Presidente?' - >>> print(f'{question!a}') - '\xbfD\xf3nde est\xe1 el Presidente?' - -While debugging it may be helpful to see both the expression and its value, -by using the equals sign (``=``) after the expression. -This preserves spaces within the brackets, and can be used with a converter. -By default, the debugging operator uses the :func:`repr` (``!r``) conversion. -For example: -.. doctest:: + >>> f'{one_third!s} is {one_third!r}' + '1/3 is Fraction(1, 3)' - >>> from fractions import Fraction - >>> calculation = Fraction(1, 3) - >>> f'{calculation=}' - 'calculation=Fraction(1, 3)' - >>> f'{calculation = }' - 'calculation = Fraction(1, 3)' - >>> f'{calculation = !s}' - 'calculation = 1/3' - -Once the output has been evaluated, it can be formatted using a -:ref:`format specifier ` following a colon (``':'``). -After the expression has been evaluated, and possibly converted to a string, -the :meth:`!__format__` method of the result is called with the format specifier, -or the empty string if no format specifier is given. -The formatted result is then used as the final value for the replacement field. -For example: + >>> string = "¡kočka 😸!" + >>> ascii(string) + "'\\xa1ko\\u010dka \\U0001f638!'" -.. doctest:: + >>> f'{string = !a}' + "string = '\\xa1ko\\u010dka \\U0001f638!'" + + +Format specifier +^^^^^^^^^^^^^^^^ + +After the expression has been evaluated, and possibly converted using an +explicit conversion specifier, it is formatted using the :func:`format` function. +If the replacement field includes a *format specifier* introduced by a colon +(``:``), the specifier is passed to :func:`!format` as the second argument. +The result of :func:`!format` is then used as the final value for the +replacement field. For example:: >>> from fractions import Fraction - >>> f'{Fraction(1, 7):.6f}' - '0.142857' - >>> f'{Fraction(1, 7):_^+10}' - '___+1/7___' + >>> one_third = Fraction(1, 3) + >>> f'{one_third:.6f}' + '0.333333' + >>> f'{one_third:_^+10}' + '___+1/3___' + >>> >>> f'{one_third!r:_^20}' + '___Fraction(1, 3)___' + >>> f'{one_third = :~>10}~' + 'one_third = ~~~~~~~1/3~' + +.. _stdtypes-tstrings: + +Template String Literals (t-strings) +------------------------------------ + +An :dfn:`t-string` (formally a :dfn:`template string literal`) is +a string literal that is prefixed with ``t`` or ``T``. + +These strings follow the same syntax and evaluation rules as +:ref:`formatted string literals `, +with for the following differences: + +* Rather than evaluating to a ``str`` object, template string literals evaluate + to a :class:`string.templatelib.Template` object. + +* The :func:`format` protocol is not used. + Instead, the format specifier and conversions (if any) are passed to + a new :class:`~string.templatelib.Interpolation` object that is created + for each evaluated expression. + It is up to code that processes the resulting :class:`~string.templatelib.Template` + object to decide how to handle format specifiers and conversions. + +* Format specifiers containing nested replacement fields are evaluated eagerly, + prior to being passed to the :class:`~string.templatelib.Interpolation` object. + For instance, an interpolation of the form ``{amount:.{precision}f}`` will + evaluate the inner expression ``{precision}`` to determine the value of the + ``format_spec`` attribute. + If ``precision`` were to be ``2``, the resulting format specifier + would be ``'.2f'``. + +* When the equals sign ``'='`` is provided in an interpolation expression, + the text of the expression is appended to the literal string that precedes + the relevant interpolation. + This includes the equals sign and any surrounding whitespace. + The :class:`!Interpolation` instance for the expression will be created as + normal, except that :attr:`~string.templatelib.Interpolation.conversion` will + be set to '``r``' (:func:`repr`) by default. + If an explicit conversion or format specifier are provided, + this will override the default behaviour. .. _old-string-formatting: @@ -2615,11 +2827,12 @@ For example: The formatting operations described here exhibit a variety of quirks that lead to a number of common errors (such as failing to display tuples and - dictionaries correctly). Using the newer :ref:`formatted string literals - `, the :meth:`str.format` interface, or :ref:`template strings - ` may help avoid these errors. Each of these - alternatives provides their own trade-offs and benefits of simplicity, - flexibility, and/or extensibility. + dictionaries correctly). + + Using :ref:`formatted string literals `, the :meth:`str.format` + interface, or :class:`string.Template` may help avoid these errors. + Each of these alternatives provides their own trade-offs and benefits of + simplicity, flexibility, and/or extensibility. String objects have one unique built-in operation: the ``%`` operator (modulo). This is also known as the string *formatting* or *interpolation* operator. @@ -2831,7 +3044,8 @@ binary protocols are based on the ASCII text encoding, bytes objects offer several methods that are only valid when working with ASCII compatible data and are closely related to string objects in a variety of other ways. -.. class:: bytes([source[, encoding[, errors]]]) +.. class:: bytes(source=b'') + bytes(source, encoding, errors='strict') Firstly, the syntax for bytes literals is largely the same as that for string literals, except that a ``b`` prefix is added: @@ -2871,7 +3085,7 @@ data and are closely related to string objects in a variety of other ways. numbers are a commonly used format for describing binary data. Accordingly, the bytes type has an additional class method to read data in that format: - .. classmethod:: fromhex(string) + .. classmethod:: fromhex(string, /) This :class:`bytes` class method returns a bytes object, decoding the given string object. The string must contain two hexadecimal digits per @@ -2891,7 +3105,8 @@ data and are closely related to string objects in a variety of other ways. A reverse conversion function exists to transform a bytes object into its hexadecimal representation. - .. method:: hex([sep[, bytes_per_sep]]) + .. method:: hex(*, bytes_per_sep=1) + hex(sep, bytes_per_sep=1) Return a string object containing two hexadecimal digits for each byte in the instance. @@ -2940,7 +3155,8 @@ Bytearray Objects :class:`bytearray` objects are a mutable counterpart to :class:`bytes` objects. -.. class:: bytearray([source[, encoding[, errors]]]) +.. class:: bytearray(source=b'') + bytearray(source, encoding, errors='strict') There is no dedicated literal syntax for bytearray objects, instead they are always created by calling the constructor: @@ -2960,7 +3176,7 @@ objects. numbers are a commonly used format for describing binary data. Accordingly, the bytearray type has an additional class method to read data in that format: - .. classmethod:: fromhex(string) + .. classmethod:: fromhex(string, /) This :class:`bytearray` class method returns bytearray object, decoding the given string object. The string must contain two hexadecimal digits @@ -2980,7 +3196,8 @@ objects. A reverse conversion function exists to transform a bytearray object into its hexadecimal representation. - .. method:: hex([sep[, bytes_per_sep]]) + .. method:: hex(*, bytes_per_sep=1) + hex(sep, bytes_per_sep=1) Return a string object containing two hexadecimal digits for each byte in the instance. @@ -2995,7 +3212,7 @@ objects. optional *sep* and *bytes_per_sep* parameters to insert separators between bytes in the hex output. - .. method:: resize(size) + .. method:: resize(size, /) Resize the :class:`bytearray` to contain *size* bytes. *size* must be greater than or equal to 0. @@ -3218,8 +3435,8 @@ arbitrary binary data. Also accept an integer in the range 0 to 255 as the subsequence. -.. method:: bytes.join(iterable) - bytearray.join(iterable) +.. method:: bytes.join(iterable, /) + bytearray.join(iterable, /) Return a bytes or bytearray object which is the concatenation of the binary data sequences in *iterable*. A :exc:`TypeError` will be raised @@ -3229,8 +3446,8 @@ arbitrary binary data. bytearray object providing this method. -.. staticmethod:: bytes.maketrans(from, to) - bytearray.maketrans(from, to) +.. staticmethod:: bytes.maketrans(from, to, /) + bytearray.maketrans(from, to, /) This static method returns a translation table usable for :meth:`bytes.translate` that will map each character in *from* into the @@ -3240,8 +3457,8 @@ arbitrary binary data. .. versionadded:: 3.1 -.. method:: bytes.partition(sep) - bytearray.partition(sep) +.. method:: bytes.partition(sep, /) + bytearray.partition(sep, /) Split the sequence at the first occurrence of *sep*, and return a 3-tuple containing the part before the separator, the separator itself or its @@ -3253,8 +3470,8 @@ arbitrary binary data. The separator to search for may be any :term:`bytes-like object`. -.. method:: bytes.replace(old, new[, count]) - bytearray.replace(old, new[, count]) +.. method:: bytes.replace(old, new, count=-1, /) + bytearray.replace(old, new, count=-1, /) Return a copy of the sequence with all occurrences of subsequence *old* replaced by *new*. If the optional argument *count* is given, only the @@ -3297,8 +3514,8 @@ arbitrary binary data. Also accept an integer in the range 0 to 255 as the subsequence. -.. method:: bytes.rpartition(sep) - bytearray.rpartition(sep) +.. method:: bytes.rpartition(sep, /) + bytearray.rpartition(sep, /) Split the sequence at the last occurrence of *sep*, and return a 3-tuple containing the part before the separator, the separator itself or its @@ -3348,8 +3565,8 @@ with arbitrary binary data by passing appropriate arguments. Note that all of the bytearray methods in this section do *not* operate in place, and instead produce new objects. -.. method:: bytes.center(width[, fillbyte]) - bytearray.center(width[, fillbyte]) +.. method:: bytes.center(width, fillbyte=b' ', /) + bytearray.center(width, fillbyte=b' ', /) Return a copy of the object centered in a sequence of length *width*. Padding is done using the specified *fillbyte* (default is an ASCII @@ -3362,8 +3579,8 @@ produce new objects. it always produces a new object, even if no changes were made. -.. method:: bytes.ljust(width[, fillbyte]) - bytearray.ljust(width[, fillbyte]) +.. method:: bytes.ljust(width, fillbyte=b' ', /) + bytearray.ljust(width, fillbyte=b' ', /) Return a copy of the object left justified in a sequence of length *width*. Padding is done using the specified *fillbyte* (default is an ASCII @@ -3376,14 +3593,13 @@ produce new objects. it always produces a new object, even if no changes were made. -.. method:: bytes.lstrip([chars]) - bytearray.lstrip([chars]) +.. method:: bytes.lstrip(bytes=None, /) + bytearray.lstrip(bytes=None, /) Return a copy of the sequence with specified leading bytes removed. The - *chars* argument is a binary sequence specifying the set of byte values to - be removed - the name refers to the fact this method is usually used with - ASCII characters. If omitted or ``None``, the *chars* argument defaults - to removing ASCII whitespace. The *chars* argument is not a prefix; + *bytes* argument is a binary sequence specifying the set of byte values to + be removed. If omitted or ``None``, the *bytes* argument defaults + to removing ASCII whitespace. The *bytes* argument is not a prefix; rather, all combinations of its values are stripped:: >>> b' spacious '.lstrip() @@ -3407,8 +3623,8 @@ produce new objects. it always produces a new object, even if no changes were made. -.. method:: bytes.rjust(width[, fillbyte]) - bytearray.rjust(width[, fillbyte]) +.. method:: bytes.rjust(width, fillbyte=b' ', /) + bytearray.rjust(width, fillbyte=b' ', /) Return a copy of the object right justified in a sequence of length *width*. Padding is done using the specified *fillbyte* (default is an ASCII @@ -3432,14 +3648,13 @@ produce new objects. :meth:`split` which is described in detail below. -.. method:: bytes.rstrip([chars]) - bytearray.rstrip([chars]) +.. method:: bytes.rstrip(bytes=None, /) + bytearray.rstrip(bytes=None, /) Return a copy of the sequence with specified trailing bytes removed. The - *chars* argument is a binary sequence specifying the set of byte values to - be removed - the name refers to the fact this method is usually used with - ASCII characters. If omitted or ``None``, the *chars* argument defaults to - removing ASCII whitespace. The *chars* argument is not a suffix; rather, + *bytes* argument is a binary sequence specifying the set of byte values to + be removed. If omitted or ``None``, the *bytes* argument defaults to + removing ASCII whitespace. The *bytes* argument is not a suffix; rather, all combinations of its values are stripped:: >>> b' spacious '.rstrip() @@ -3509,14 +3724,13 @@ produce new objects. [b'1', b'2', b'3'] -.. method:: bytes.strip([chars]) - bytearray.strip([chars]) +.. method:: bytes.strip(bytes=None, /) + bytearray.strip(bytes=None, /) Return a copy of the sequence with specified leading and trailing bytes - removed. The *chars* argument is a binary sequence specifying the set of - byte values to be removed - the name refers to the fact this method is - usually used with ASCII characters. If omitted or ``None``, the *chars* - argument defaults to removing ASCII whitespace. The *chars* argument is + removed. The *bytes* argument is a binary sequence specifying the set of + byte values to be removed. If omitted or ``None``, the *bytes* + argument defaults to removing ASCII whitespace. The *bytes* argument is not a prefix or suffix; rather, all combinations of its values are stripped:: @@ -3838,8 +4052,8 @@ place, and instead produce new objects. always produces a new object, even if no changes were made. -.. method:: bytes.zfill(width) - bytearray.zfill(width) +.. method:: bytes.zfill(width, /) + bytearray.zfill(width, /) Return a copy of the sequence left filled with ASCII ``b'0'`` digits to make a sequence of length *width*. A leading sign prefix (``b'+'``/ @@ -4251,7 +4465,8 @@ copying. in-memory Fortran order is preserved. For non-contiguous views, the data is converted to C first. *order=None* is the same as *order='C'*. - .. method:: hex([sep[, bytes_per_sep]]) + .. method:: hex(*, bytes_per_sep=1) + hex(sep, bytes_per_sep=1) Return a string object containing two hexadecimal digits for each byte in the buffer. :: @@ -4336,7 +4551,8 @@ copying. .. versionadded:: 3.2 - .. method:: cast(format[, shape]) + .. method:: cast(format, /) + cast(format, shape, /) Cast a memoryview to a new format or shape. *shape* defaults to ``[byte_length//new_itemsize]``, which means that the result view @@ -4586,11 +4802,12 @@ other sequence-like behavior. There are currently two built-in set types, :class:`set` and :class:`frozenset`. The :class:`set` type is mutable --- the contents can be changed using methods -like :meth:`~set.add` and :meth:`~set.remove`. Since it is mutable, it has no -hash value and cannot be used as either a dictionary key or as an element of -another set. The :class:`frozenset` type is immutable and :term:`hashable` --- -its contents cannot be altered after it is created; it can therefore be used as -a dictionary key or as an element of another set. +like :meth:`~set.add` and :meth:`~set.remove`. +Since it is mutable, it has no hash value and cannot be used as +either a dictionary key or as an element of another set. +The :class:`frozenset` type is immutable and :term:`hashable` --- +its contents cannot be altered after it is created; +it can therefore be used as a dictionary key or as an element of another set. Non-empty sets (not frozensets) can be created by placing a comma-separated list of elements within braces, for example: ``{'jack', 'sjoerd'}``, in addition to the @@ -4598,8 +4815,8 @@ of elements within braces, for example: ``{'jack', 'sjoerd'}``, in addition to t The constructors for both classes work the same: -.. class:: set([iterable]) - frozenset([iterable]) +.. class:: set(iterable=(), /) + frozenset(iterable=(), /) Return a new set or frozenset object whose elements are taken from *iterable*. The elements of a set must be :term:`hashable`. To @@ -4607,164 +4824,172 @@ The constructors for both classes work the same: objects. If *iterable* is not specified, a new empty set is returned. - Sets can be created by several means: +Sets can be created by several means: - * Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}`` - * Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}`` - * Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])`` +* Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}`` +* Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}`` +* Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])`` - Instances of :class:`set` and :class:`frozenset` provide the following - operations: +Instances of :class:`set` and :class:`frozenset` provide the following +operations: - .. describe:: len(s) +.. describe:: len(s) - Return the number of elements in set *s* (cardinality of *s*). + Return the number of elements in set *s* (cardinality of *s*). - .. describe:: x in s +.. describe:: x in s - Test *x* for membership in *s*. + Test *x* for membership in *s*. - .. describe:: x not in s +.. describe:: x not in s - Test *x* for non-membership in *s*. + Test *x* for non-membership in *s*. - .. method:: isdisjoint(other) +.. method:: frozenset.isdisjoint(other, /) + set.isdisjoint(other, /) - Return ``True`` if the set has no elements in common with *other*. Sets are - disjoint if and only if their intersection is the empty set. + Return ``True`` if the set has no elements in common with *other*. Sets are + disjoint if and only if their intersection is the empty set. - .. method:: issubset(other) - set <= other +.. method:: frozenset.issubset(other, /) + set.issubset(other, /) +.. describe:: set <= other - Test whether every element in the set is in *other*. + Test whether every element in the set is in *other*. - .. method:: set < other +.. describe:: set < other - Test whether the set is a proper subset of *other*, that is, - ``set <= other and set != other``. + Test whether the set is a proper subset of *other*, that is, + ``set <= other and set != other``. - .. method:: issuperset(other) - set >= other +.. method:: frozenset.issuperset(other, /) + set.issuperset(other, /) +.. describe:: set >= other - Test whether every element in *other* is in the set. + Test whether every element in *other* is in the set. - .. method:: set > other +.. describe:: set > other - Test whether the set is a proper superset of *other*, that is, ``set >= - other and set != other``. + Test whether the set is a proper superset of *other*, that is, ``set >= + other and set != other``. - .. method:: union(*others) - set | other | ... +.. method:: frozenset.union(*others) + set.union(*others) +.. describe:: set | other | ... - Return a new set with elements from the set and all others. + Return a new set with elements from the set and all others. - .. method:: intersection(*others) - set & other & ... +.. method:: frozenset.intersection(*others) + set.intersection(*others) +.. describe:: set & other & ... - Return a new set with elements common to the set and all others. + Return a new set with elements common to the set and all others. - .. method:: difference(*others) - set - other - ... +.. method:: frozenset.difference(*others) + set.difference(*others) +.. describe:: set - other - ... - Return a new set with elements in the set that are not in the others. + Return a new set with elements in the set that are not in the others. - .. method:: symmetric_difference(other) - set ^ other +.. method:: frozenset.symmetric_difference(other, /) + set.symmetric_difference(other, /) +.. describe:: set ^ other - Return a new set with elements in either the set or *other* but not both. + Return a new set with elements in either the set or *other* but not both. - .. method:: copy() +.. method:: frozenset.copy() + set.copy() - Return a shallow copy of the set. + Return a shallow copy of the set. - Note, the non-operator versions of :meth:`union`, :meth:`intersection`, - :meth:`difference`, :meth:`symmetric_difference`, :meth:`issubset`, and - :meth:`issuperset` methods will accept any iterable as an argument. In - contrast, their operator based counterparts require their arguments to be - sets. This precludes error-prone constructions like ``set('abc') & 'cbs'`` - in favor of the more readable ``set('abc').intersection('cbs')``. +Note, the non-operator versions of :meth:`~frozenset.union`, +:meth:`~frozenset.intersection`, :meth:`~frozenset.difference`, :meth:`~frozenset.symmetric_difference`, :meth:`~frozenset.issubset`, and +:meth:`~frozenset.issuperset` methods will accept any iterable as an argument. In +contrast, their operator based counterparts require their arguments to be +sets. This precludes error-prone constructions like ``set('abc') & 'cbs'`` +in favor of the more readable ``set('abc').intersection('cbs')``. - Both :class:`set` and :class:`frozenset` support set to set comparisons. Two - sets are equal if and only if every element of each set is contained in the - other (each is a subset of the other). A set is less than another set if and - only if the first set is a proper subset of the second set (is a subset, but - is not equal). A set is greater than another set if and only if the first set - is a proper superset of the second set (is a superset, but is not equal). +Both :class:`set` and :class:`frozenset` support set to set comparisons. Two +sets are equal if and only if every element of each set is contained in the +other (each is a subset of the other). A set is less than another set if and +only if the first set is a proper subset of the second set (is a subset, but +is not equal). A set is greater than another set if and only if the first set +is a proper superset of the second set (is a superset, but is not equal). - Instances of :class:`set` are compared to instances of :class:`frozenset` - based on their members. For example, ``set('abc') == frozenset('abc')`` - returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``. +Instances of :class:`set` are compared to instances of :class:`frozenset` +based on their members. For example, ``set('abc') == frozenset('abc')`` +returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``. - The subset and equality comparisons do not generalize to a total ordering - function. For example, any two nonempty disjoint sets are not equal and are not - subsets of each other, so *all* of the following return ``False``: ``ab``. +The subset and equality comparisons do not generalize to a total ordering +function. For example, any two nonempty disjoint sets are not equal and are not +subsets of each other, so *all* of the following return ``False``: ``ab``. - Since sets only define partial ordering (subset relationships), the output of - the :meth:`list.sort` method is undefined for lists of sets. +Since sets only define partial ordering (subset relationships), the output of +the :meth:`list.sort` method is undefined for lists of sets. - Set elements, like dictionary keys, must be :term:`hashable`. +Set elements, like dictionary keys, must be :term:`hashable`. - Binary operations that mix :class:`set` instances with :class:`frozenset` - return the type of the first operand. For example: ``frozenset('ab') | - set('bc')`` returns an instance of :class:`frozenset`. +Binary operations that mix :class:`set` instances with :class:`frozenset` +return the type of the first operand. For example: ``frozenset('ab') | +set('bc')`` returns an instance of :class:`frozenset`. - The following table lists operations available for :class:`set` that do not - apply to immutable instances of :class:`frozenset`: +The following table lists operations available for :class:`set` that do not +apply to immutable instances of :class:`frozenset`: - .. method:: update(*others) - set |= other | ... +.. method:: set.update(*others) +.. describe:: set |= other | ... - Update the set, adding elements from all others. + Update the set, adding elements from all others. - .. method:: intersection_update(*others) - set &= other & ... +.. method:: set.intersection_update(*others) +.. describe:: set &= other & ... - Update the set, keeping only elements found in it and all others. + Update the set, keeping only elements found in it and all others. - .. method:: difference_update(*others) - set -= other | ... +.. method:: set.difference_update(*others) +.. describe:: set -= other | ... - Update the set, removing elements found in others. + Update the set, removing elements found in others. - .. method:: symmetric_difference_update(other) - set ^= other +.. method:: set.symmetric_difference_update(other, /) +.. describe:: set ^= other - Update the set, keeping only elements found in either set, but not in both. + Update the set, keeping only elements found in either set, but not in both. - .. method:: add(elem) +.. method:: set.add(elem, /) - Add element *elem* to the set. + Add element *elem* to the set. - .. method:: remove(elem) +.. method:: set.remove(elem, /) - Remove element *elem* from the set. Raises :exc:`KeyError` if *elem* is - not contained in the set. + Remove element *elem* from the set. Raises :exc:`KeyError` if *elem* is + not contained in the set. - .. method:: discard(elem) +.. method:: set.discard(elem, /) - Remove element *elem* from the set if it is present. + Remove element *elem* from the set if it is present. - .. method:: pop() +.. method:: set.pop() - Remove and return an arbitrary element from the set. Raises - :exc:`KeyError` if the set is empty. + Remove and return an arbitrary element from the set. Raises + :exc:`KeyError` if the set is empty. - .. method:: clear() +.. method:: set.clear() - Remove all elements from the set. + Remove all elements from the set. - Note, the non-operator versions of the :meth:`update`, - :meth:`intersection_update`, :meth:`difference_update`, and - :meth:`symmetric_difference_update` methods will accept any iterable as an - argument. +Note, the non-operator versions of the :meth:`~set.update`, +:meth:`~set.intersection_update`, :meth:`~set.difference_update`, and +:meth:`~set.symmetric_difference_update` methods will accept any iterable as an +argument. - Note, the *elem* argument to the :meth:`~object.__contains__`, - :meth:`remove`, and - :meth:`discard` methods may be a set. To support searching for an equivalent - frozenset, a temporary one is created from *elem*. +Note, the *elem* argument to the :meth:`~object.__contains__`, +:meth:`~set.remove`, and +:meth:`~set.discard` methods may be a set. To support searching for an equivalent +frozenset, a temporary one is created from *elem*. .. _typesmapping: @@ -4794,8 +5019,8 @@ Values that compare equal (such as ``1``, ``1.0``, and ``True``) can be used interchangeably to index the same dictionary entry. .. class:: dict(**kwargs) - dict(mapping, **kwargs) - dict(iterable, **kwargs) + dict(mapping, /, **kwargs) + dict(iterable, /, **kwargs) Return a new dictionary initialized from an optional positional argument and a possibly empty set of keyword arguments. @@ -4823,7 +5048,13 @@ can be used interchangeably to index the same dictionary entry. being added is already present, the value from the keyword argument replaces the value from the positional argument. - To illustrate, the following examples all return a dictionary equal to + Providing keyword arguments as in the first example only works for keys that + are valid Python identifiers. Otherwise, any valid keys can be used. + + Dictionaries compare equal if and only if they have the same ``(key, + value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise + :exc:`TypeError`. To illustrate dictionary creation and equality, + the following examples all return a dictionary equal to ``{"one": 1, "two": 2, "three": 3}``:: >>> a = dict(one=1, two=2, three=3) @@ -4838,6 +5069,27 @@ can be used interchangeably to index the same dictionary entry. Providing keyword arguments as in the first example only works for keys that are valid Python identifiers. Otherwise, any valid keys can be used. + Dictionaries preserve insertion order. Note that updating a key does not + affect the order. Keys added after deletion are inserted at the end. :: + + >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} + >>> d + {'one': 1, 'two': 2, 'three': 3, 'four': 4} + >>> list(d) + ['one', 'two', 'three', 'four'] + >>> list(d.values()) + [1, 2, 3, 4] + >>> d["one"] = 42 + >>> d + {'one': 42, 'two': 2, 'three': 3, 'four': 4} + >>> del d["two"] + >>> d["two"] = None + >>> d + {'one': 42, 'three': 3, 'four': 4, 'two': None} + + .. versionchanged:: 3.7 + Dictionary order is guaranteed to be insertion order. This behavior was + an implementation detail of CPython from 3.6. These are the operations that dictionaries support (and therefore, custom mapping types should support too): @@ -4857,13 +5109,13 @@ can be used interchangeably to index the same dictionary entry. .. index:: __missing__() - If a subclass of dict defines a method :meth:`__missing__` and *key* + If a subclass of dict defines a method :meth:`~object.__missing__` and *key* is not present, the ``d[key]`` operation calls that method with the key *key* as argument. The ``d[key]`` operation then returns or raises whatever is returned or raised by the ``__missing__(key)`` call. - No other operations or methods invoke :meth:`__missing__`. If - :meth:`__missing__` is not defined, :exc:`KeyError` is raised. - :meth:`__missing__` must be a method; it cannot be an instance variable:: + No other operations or methods invoke :meth:`~object.__missing__`. If + :meth:`~object.__missing__` is not defined, :exc:`KeyError` is raised. + :meth:`~object.__missing__` must be a method; it cannot be an instance variable:: >>> class Counter(dict): ... def __missing__(self, key): @@ -4877,7 +5129,8 @@ can be used interchangeably to index the same dictionary entry. 1 The example above shows part of the implementation of - :class:`collections.Counter`. A different ``__missing__`` method is used + :class:`collections.Counter`. + A different :meth:`!__missing__` method is used by :class:`collections.defaultdict`. .. describe:: d[key] = value @@ -4936,7 +5189,8 @@ can be used interchangeably to index the same dictionary entry. Return a new view of the dictionary's keys. See the :ref:`documentation of view objects `. - .. method:: pop(key[, default]) + .. method:: pop(key, /) + pop(key, default, /) If *key* is in the dictionary, remove it and return its value, else return *default*. If *default* is not given and *key* is not in the dictionary, @@ -4968,9 +5222,11 @@ can be used interchangeably to index the same dictionary entry. with a value of *default* and return *default*. *default* defaults to ``None``. - .. method:: update([other]) + .. method:: update(**kwargs) + update(mapping, /, **kwargs) + update(iterable, /, **kwargs) - Update the dictionary with the key/value pairs from *other*, overwriting + Update the dictionary with the key/value pairs from *mapping* or *iterable* and *kwargs*, overwriting existing keys. Return ``None``. :meth:`update` accepts either another object with a ``keys()`` method (in @@ -5008,32 +5264,6 @@ can be used interchangeably to index the same dictionary entry. .. versionadded:: 3.9 - Dictionaries compare equal if and only if they have the same ``(key, - value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise - :exc:`TypeError`. - - Dictionaries preserve insertion order. Note that updating a key does not - affect the order. Keys added after deletion are inserted at the end. :: - - >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} - >>> d - {'one': 1, 'two': 2, 'three': 3, 'four': 4} - >>> list(d) - ['one', 'two', 'three', 'four'] - >>> list(d.values()) - [1, 2, 3, 4] - >>> d["one"] = 42 - >>> d - {'one': 42, 'two': 2, 'three': 3, 'four': 4} - >>> del d["two"] - >>> d["two"] = None - >>> d - {'one': 42, 'three': 3, 'four': 4, 'two': None} - - .. versionchanged:: 3.7 - Dictionary order is guaranteed to be insertion order. This behavior was - an implementation detail of CPython from 3.6. - Dictionaries and dictionary views are reversible. :: >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} @@ -5409,6 +5639,7 @@ list is non-exhaustive. * :class:`collections.abc.MutableMapping` * :class:`collections.abc.Sequence` * :class:`collections.abc.MutableSequence` +* :class:`collections.abc.ByteString` * :class:`collections.abc.MappingView` * :class:`collections.abc.KeysView` * :class:`collections.abc.ItemsView` @@ -5684,9 +5915,10 @@ Methods .. index:: pair: object; method -Methods are functions that are called using the attribute notation. There are -two flavors: :ref:`built-in methods ` (such as :meth:`append` -on lists) and :ref:`class instance method `. +Methods are functions that are called using the attribute notation. +There are two flavors: :ref:`built-in methods ` +(such as :meth:`~list.append` on lists) +and :ref:`class instance method `. Built-in methods are described with the types that support them. If you access a method (a function defined in a class namespace) through an @@ -5792,13 +6024,34 @@ It is written as ``None``. The Ellipsis Object ------------------- -This object is commonly used by slicing (see :ref:`slicings`). It supports no -special operations. There is exactly one ellipsis object, named +This object is commonly used to indicate that something is omitted. +It supports no special operations. There is exactly one ellipsis object, named :const:`Ellipsis` (a built-in name). ``type(Ellipsis)()`` produces the :const:`Ellipsis` singleton. It is written as ``Ellipsis`` or ``...``. +In typical use, ``...`` as the ``Ellipsis`` object appears in a few different +places, for instance: + +- In type annotations, such as :ref:`callable arguments ` + or :ref:`tuple elements `. + +- As the body of a function instead of a :ref:`pass statement `. + +- In third-party libraries, such as `Numpy's slicing and striding + `_. + +Python also uses three dots in ways that are not ``Ellipsis`` objects, for instance: + +- Doctest's :const:`ELLIPSIS `, as a pattern for missing content. + +- The default Python prompt of the :term:`interactive` shell when partial input is incomplete. + +Lastly, the Python documentation often uses three dots in conventional English +usage to mean omitted content, even in code examples that also use them as the +``Ellipsis``. + .. _bltin-notimplemented-object: diff --git a/Doc/library/string.rst b/Doc/library/string.rst index b44d98819b6998..58c836c7382330 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -4,7 +4,7 @@ .. module:: string :synopsis: Common string operations. -**Source code:** :source:`Lib/string.py` +**Source code:** :source:`Lib/string/__init__.py` -------------- @@ -198,8 +198,9 @@ Format String Syntax The :meth:`str.format` method and the :class:`Formatter` class share the same syntax for format strings (although in the case of :class:`Formatter`, subclasses can define their own format string syntax). The syntax is -related to that of :ref:`formatted string literals `, but it is -less sophisticated and, in particular, does not support arbitrary expressions. +related to that of :ref:`formatted string literals ` and +:ref:`template string literals `, but it is less sophisticated +and, in particular, does not support arbitrary expressions in interpolations. .. index:: single: {} (curly brackets); in string formatting @@ -264,6 +265,8 @@ Some simple format string examples:: "Weight in tons {0.weight}" # 'weight' attribute of first positional arg "Units destroyed: {players[0]}" # First element of keyword argument 'players'. +.. _formatstrings-conversion: + The *conversion* field causes a type coercion before formatting. Normally, the job of formatting a value is done by the :meth:`~object.__format__` method of the value itself. However, in some cases it is desirable to force a type to be formatted @@ -306,7 +309,7 @@ Format Specification Mini-Language "Format specifications" are used within replacement fields contained within a format string to define how individual values are presented (see -:ref:`formatstrings` and :ref:`f-strings`). +:ref:`formatstrings`, :ref:`f-strings`, and :ref:`t-strings`). They can also be passed directly to the built-in :func:`format` function. Each formattable type may define how the format specification is to be interpreted. @@ -328,7 +331,7 @@ The general form of a *standard format specifier* is: sign: "+" | "-" | " " width_and_precision: [`width_with_grouping`][`precision_with_grouping`] width_with_grouping: [`width`][`grouping`] - precision_with_grouping: "." [`precision`][`grouping`] + precision_with_grouping: "." [`precision`][`grouping`] | "." `grouping` width: `~python-grammar:digit`+ precision: `~python-grammar:digit`+ grouping: "," | "_" @@ -789,10 +792,22 @@ Nesting arguments and more complex examples:: -.. _template-strings: +.. _template-strings-pep292: -Template strings ----------------- +Template strings ($-strings) +---------------------------- + +.. note:: + + The feature described here was introduced in Python 2.4; + a simple templating method based upon regular expressions. + It predates :meth:`str.format`, :ref:`formatted string literals `, + and :ref:`template string literals `. + + It is unrelated to template string literals (t-strings), + which were introduced in Python 3.14. + These evaluate to :class:`string.templatelib.Template` objects, + found in the :mod:`string.templatelib` module. Template strings provide simpler string substitutions as described in :pep:`292`. A primary use case for template strings is for @@ -858,7 +873,7 @@ these rules. The methods of :class:`Template` are: .. method:: is_valid() - Returns false if the template has invalid placeholders that will cause + Returns ``False`` if the template has invalid placeholders that will cause :meth:`substitute` to raise :exc:`ValueError`. .. versionadded:: 3.11 diff --git a/Doc/library/string.templatelib.rst b/Doc/library/string.templatelib.rst new file mode 100644 index 00000000000000..a5b2d796aaf4b8 --- /dev/null +++ b/Doc/library/string.templatelib.rst @@ -0,0 +1,349 @@ +:mod:`!string.templatelib` --- Support for template string literals +=================================================================== + +.. module:: string.templatelib + :synopsis: Support for template string literals. + +**Source code:** :source:`Lib/string/templatelib.py` + +-------------- + +.. seealso:: + + * :ref:`Format strings ` + * :ref:`Template string literal (t-string) syntax ` + * :pep:`750` + +.. _template-strings: + +Template strings +---------------- + +.. versionadded:: 3.14 + +Template strings are a mechanism for custom string processing. +They have the full flexibility of Python's :ref:`f-strings`, +but return a :class:`Template` instance that gives access +to the static and interpolated (in curly brackets) parts of a string +*before* they are combined. + +To write a t-string, use a ``'t'`` prefix instead of an ``'f'``, like so: + +.. code-block:: pycon + + >>> pi = 3.14 + >>> t't-strings are new in Python {pi!s}!' + Template( + strings=('t-strings are new in Python ', '!'), + interpolations=(Interpolation(3.14, 'pi', 's', ''),) + ) + +Types +----- + +.. class:: Template + + The :class:`!Template` class describes the contents of a template string. + It is immutable, meaning that attributes of a template cannot be reassigned. + + The most common way to create a :class:`!Template` instance is to use the + :ref:`template string literal syntax `. + This syntax is identical to that of :ref:`f-strings `, + except that it uses a ``t`` prefix in place of an ``f``: + + >>> cheese = 'Red Leicester' + >>> template = t"We're fresh out of {cheese}, sir." + >>> type(template) + + + Templates are stored as sequences of literal :attr:`~Template.strings` + and dynamic :attr:`~Template.interpolations`. + A :attr:`~Template.values` attribute holds the values of the interpolations: + + >>> cheese = 'Camembert' + >>> template = t'Ah! We do have {cheese}.' + >>> template.strings + ('Ah! We do have ', '.') + >>> template.interpolations + (Interpolation('Camembert', ...),) + >>> template.values + ('Camembert',) + + The :attr:`!strings` tuple has one more element than :attr:`!interpolations` + and :attr:`!values`; the interpolations “belong” between the strings. + This may be easier to understand when tuples are aligned + + .. code-block:: python + + template.strings: ('Ah! We do have ', '.') + template.values: ( 'Camembert', ) + + .. rubric:: Attributes + + .. attribute:: strings + :type: tuple[str, ...] + + A :class:`tuple` of the static strings in the template. + + >>> cheese = 'Camembert' + >>> template = t'Ah! We do have {cheese}.' + >>> template.strings + ('Ah! We do have ', '.') + + Empty strings *are* included in the tuple: + + >>> response = 'We do have ' + >>> cheese = 'Camembert' + >>> template = t'Ah! {response}{cheese}.' + >>> template.strings + ('Ah! ', '', '.') + + The ``strings`` tuple is never empty, and always contains one more + string than the ``interpolations`` and ``values`` tuples: + + >>> t''.strings + ('',) + >>> t''.values + () + >>> t'{'cheese'}'.strings + ('', '') + >>> t'{'cheese'}'.values + ('cheese',) + + .. attribute:: interpolations + :type: tuple[Interpolation, ...] + + A :class:`tuple` of the interpolations in the template. + + >>> cheese = 'Camembert' + >>> template = t'Ah! We do have {cheese}.' + >>> template.interpolations + (Interpolation('Camembert', 'cheese', None, ''),) + + The ``interpolations`` tuple may be empty and always contains one fewer + values than the ``strings`` tuple: + + >>> t'Red Leicester'.interpolations + () + + .. attribute:: values + :type: tuple[object, ...] + + A tuple of all interpolated values in the template. + + >>> cheese = 'Camembert' + >>> template = t'Ah! We do have {cheese}.' + >>> template.values + ('Camembert',) + + The ``values`` tuple always has the same length as the + ``interpolations`` tuple. It is always equivalent to + ``tuple(i.value for i in template.interpolations)``. + + .. rubric:: Methods + + .. method:: __new__(*args: str | Interpolation) + + While literal syntax is the most common way to create a :class:`!Template`, + it is also possible to create them directly using the constructor: + + >>> from string.templatelib import Interpolation, Template + >>> cheese = 'Camembert' + >>> template = Template( + ... 'Ah! We do have ', Interpolation(cheese, 'cheese'), '.' + ... ) + >>> list(template) + ['Ah! We do have ', Interpolation('Camembert', 'cheese', None, ''), '.'] + + If multiple strings are passed consecutively, they will be concatenated + into a single value in the :attr:`~Template.strings` attribute. For example, + the following code creates a :class:`Template` with a single final string: + + >>> from string.templatelib import Template + >>> template = Template('Ah! We do have ', 'Camembert', '.') + >>> template.strings + ('Ah! We do have Camembert.',) + + If multiple interpolations are passed consecutively, they will be treated + as separate interpolations and an empty string will be inserted between them. + For example, the following code creates a template with empty placeholders + in the :attr:`~Template.strings` attribute: + + >>> from string.templatelib import Interpolation, Template + >>> template = Template( + ... Interpolation('Camembert', 'cheese'), + ... Interpolation('.', 'punctuation'), + ... ) + >>> template.strings + ('', '', '') + + .. describe:: iter(template) + + Iterate over the template, yielding each non-empty string and + :class:`Interpolation` in the correct order: + + >>> cheese = 'Camembert' + >>> list(t'Ah! We do have {cheese}.') + ['Ah! We do have ', Interpolation('Camembert', 'cheese', None, ''), '.'] + + .. caution:: + + Empty strings are **not** included in the iteration: + + >>> response = 'We do have ' + >>> cheese = 'Camembert' + >>> list(t'Ah! {response}{cheese}.') # doctest: +NORMALIZE_WHITESPACE + ['Ah! ', + Interpolation('We do have ', 'response', None, ''), + Interpolation('Camembert', 'cheese', None, ''), + '.'] + + .. describe:: template + other + template += other + + Concatenate this template with another, returning a new + :class:`!Template` instance: + + >>> cheese = 'Camembert' + >>> list(t'Ah! ' + t'We do have {cheese}.') + ['Ah! We do have ', Interpolation('Camembert', 'cheese', None, ''), '.'] + + Concatenating a :class:`!Template` and a ``str`` is **not** supported. + This is because it is unclear whether the string should be treated as + a static string or an interpolation. + If you want to concatenate a :class:`!Template` with a string, + you should either wrap the string directly in a :class:`!Template` + (to treat it as a static string) + or use an :class:`!Interpolation` (to treat it as dynamic): + + >>> from string.templatelib import Interpolation, Template + >>> template = t'Ah! ' + >>> # Treat 'We do have ' as a static string + >>> template += Template('We do have ') + >>> # Treat cheese as an interpolation + >>> cheese = 'Camembert' + >>> template += Template(Interpolation(cheese, 'cheese')) + >>> list(template) + ['Ah! We do have ', Interpolation('Camembert', 'cheese', None, '')] + + +.. class:: Interpolation + + The :class:`!Interpolation` type represents an expression inside a template string. + It is immutable, meaning that attributes of an interpolation cannot be reassigned. + + Interpolations support pattern matching, allowing you to match against + their attributes with the :ref:`match statement `: + + >>> from string.templatelib import Interpolation + >>> interpolation = t'{1. + 2.:.2f}'.interpolations[0] + >>> interpolation + Interpolation(3.0, '1. + 2.', None, '.2f') + >>> match interpolation: + ... case Interpolation(value, expression, conversion, format_spec): + ... print(value, expression, conversion, format_spec, sep=' | ') + ... + 3.0 | 1. + 2. | None | .2f + + .. rubric:: Attributes + + .. attribute:: value + :type: object + + The evaluated value of the interpolation. + + >>> t'{1 + 2}'.interpolations[0].value + 3 + + .. attribute:: expression + :type: str + + For interpolations created by t-string literals, :attr:`!expression` + is the expression text found inside the curly brackets (``{`` & ``}``), + including any whitespace, excluding the curly brackets themselves, + and ending before the first ``!``, ``:``, or ``=`` if any is present. + For manually created interpolations, :attr:`!expression` is the arbitrary + string provided when constructing the interpolation instance. + + We recommend using valid Python expressions or the empty string for the + ``expression`` field of manually created :class:`!Interpolation` + instances, although this is not enforced at runtime. + + >>> t'{1 + 2}'.interpolations[0].expression + '1 + 2' + + .. attribute:: conversion + :type: typing.Literal['a', 'r', 's'] | None + + The conversion to apply to the value, or ``None``. + + The :attr:`!conversion` is the optional conversion to apply + to the value: + + >>> t'{1 + 2!a}'.interpolations[0].conversion + 'a' + + .. note:: + + Unlike f-strings, where conversions are applied automatically, + the expected behavior with t-strings is that code that *processes* the + :class:`!Template` will decide how to interpret and whether to apply + the :attr:`!conversion`. + For convenience, the :func:`convert` function can be used to mimic + f-string conversion semantics. + + .. attribute:: format_spec + :type: str + + The format specification to apply to the value. + + The :attr:`!format_spec` is an optional, arbitrary string + used as the format specification to present the value: + + >>> t'{1 + 2:.2f}'.interpolations[0].format_spec + '.2f' + + .. note:: + + Unlike f-strings, where format specifications are applied automatically + via the :func:`format` protocol, the expected behavior with + t-strings is that code that *processes* the interpolation will + decide how to interpret and whether to apply the format specification. + As a result, :attr:`!format_spec` values in interpolations + can be arbitrary strings, + including those that do not conform to the :func:`format` protocol. + + .. rubric:: Methods + + .. method:: __new__(value: object, \ + expression: str, \ + conversion: typing.Literal['a', 'r', 's'] | None = None, \ + format_spec: str = '') + + Create a new :class:`!Interpolation` object from component parts. + + :param value: The evaluated, in-scope result of the interpolation. + :param expression: The text of a valid Python expression, + or an empty string. + :param conversion: The :ref:`conversion ` to be used, + one of ``None``, ``'a'``, ``'r'``, or ``'s'``. + :param format_spec: An optional, arbitrary string used as the + :ref:`format specification ` to present the value. + + +Helper functions +---------------- + +.. function:: convert(obj, /, conversion) + + Applies formatted string literal :ref:`conversion ` + semantics to the given object *obj*. + This is frequently useful for custom template string processing logic. + + Three conversion flags are currently supported: + + * ``'s'`` which calls :func:`str` on the value (like ``!s``), + * ``'r'`` which calls :func:`repr` (like ``!r``), and + * ``'a'`` which calls :func:`ascii` (like ``!a``). + + If the conversion flag is ``None``, *obj* is returned unchanged. diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 028a7861f36798..b8dfcc310771fe 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -649,7 +649,7 @@ functions. If specified, *env* must provide any variables required for the program to execute. On Windows, in order to run a `side-by-side assembly`_ the - specified *env* **must** include a valid :envvar:`SystemRoot`. + specified *env* **must** include a valid ``%SystemRoot%``. .. _side-by-side assembly: https://en.wikipedia.org/wiki/Side-by-Side_Assembly @@ -831,7 +831,9 @@ Instances of the :class:`Popen` class have the following methods: If the process does not terminate after *timeout* seconds, a :exc:`TimeoutExpired` exception will be raised. Catching this exception and - retrying communication will not lose any output. + retrying communication will not lose any output. Supplying *input* to a + subsequent post-timeout :meth:`communicate` call is in undefined behavior + and may become an error in the future. The child process is not killed if the timeout expires, so in order to cleanup properly a well-behaved application should kill the child process and @@ -844,6 +846,11 @@ Instances of the :class:`Popen` class have the following methods: proc.kill() outs, errs = proc.communicate() + After a call to :meth:`~Popen.communicate` raises :exc:`TimeoutExpired`, do + not call :meth:`~Popen.wait`. Use an additional :meth:`~Popen.communicate` + call to finish handling pipes and populate the :attr:`~Popen.returncode` + attribute. + .. note:: The data read is buffered in memory, so do not use this method if the data @@ -1473,7 +1480,7 @@ handling consistency are valid for these functions. Return ``(exitcode, output)`` of executing *cmd* in a shell. - Execute the string *cmd* in a shell with :meth:`Popen.check_output` and + Execute the string *cmd* in a shell with :func:`check_output` and return a 2-tuple ``(exitcode, output)``. *encoding* and *errors* are used to decode output; see the notes on :ref:`frequently-used-arguments` for more details. diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 0674074b8c0df6..303655fb128b37 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -137,7 +137,8 @@ The following events are supported: .. monitoring-event:: PY_UNWIND - Exit from a Python function during exception unwinding. + Exit from a Python function during exception unwinding. This includes exceptions raised directly within the + function and that are allowed to continue to propagate. .. monitoring-event:: PY_YIELD @@ -171,7 +172,7 @@ events, use the expression ``PY_RETURN | PY_START``. if get_events(DEBUGGER_ID) == NO_EVENTS: ... -Events are divided into three groups: + Setting this event deactivates all events. .. _monitoring-event-local: @@ -215,14 +216,17 @@ by another event: The :monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events are controlled by the :monitoring-event:`CALL` event. -:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be seen if the -corresponding :monitoring-event:`CALL` event is being monitored. +:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be +seen if the corresponding :monitoring-event:`CALL` event is being monitored. + + +.. _monitoring-event-global: Other events '''''''''''' Other events are not necessarily tied to a specific location in the -program and cannot be individually disabled. +program and cannot be individually disabled via :data:`DISABLE`. The other events that can be monitored are: @@ -243,20 +247,23 @@ raise an exception unless it would be visible to other code. To allow tools to monitor for real exceptions without slowing down generators and coroutines, the :monitoring-event:`STOP_ITERATION` event is provided. -:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike :monitoring-event:`RAISE`. +:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike +:monitoring-event:`RAISE`. -Note that the :monitoring-event:`STOP_ITERATION` event and the :monitoring-event:`RAISE` -event for a :exc:`StopIteration` exception are equivalent, and are treated as interchangeable -when generating events. Implementations will favor :monitoring-event:`STOP_ITERATION` for -performance reasons, but may generate a :monitoring-event:`RAISE` event with a :exc:`StopIteration`. +Note that the :monitoring-event:`STOP_ITERATION` event and the +:monitoring-event:`RAISE` event for a :exc:`StopIteration` exception are +equivalent, and are treated as interchangeable when generating events. +Implementations will favor :monitoring-event:`STOP_ITERATION` for performance +reasons, but may generate a :monitoring-event:`RAISE` event with a +:exc:`StopIteration`. Turning events on and off ------------------------- In order to monitor an event, it must be turned on and a corresponding callback -must be registered. -Events can be turned on or off by setting the events either globally or -for a particular code object. +must be registered. Events can be turned on or off by setting the events either +globally and/or for a particular code object. An event will trigger only once, +even if it is turned on both globally and locally. Setting events globally @@ -285,16 +292,13 @@ in Python (see :ref:`c-api-monitoring`). .. function:: get_local_events(tool_id: int, code: CodeType, /) -> int - Returns all the local events for *code* + Returns all the :ref:`local events ` for *code* .. function:: set_local_events(tool_id: int, code: CodeType, event_set: int, /) -> None - Activates all the local events for *code* which are set in *event_set*. - Raises a :exc:`ValueError` if *tool_id* is not in use. - -Local events add to global events, but do not mask them. -In other words, all global events will trigger for a code object, -regardless of the local events. + Activates all the :ref:`local events ` for *code* + which are set in *event_set*. Raises a :exc:`ValueError` if *tool_id* is not + in use. Disabling events @@ -305,15 +309,21 @@ Disabling events A special value that can be returned from a callback function to disable events for the current code location. -Local events can be disabled for a specific code location by returning -:data:`sys.monitoring.DISABLE` from a callback function. This does not change -which events are set, or any other code locations for the same event. +:ref:`Local events ` can be disabled for a specific code +location by returning :data:`sys.monitoring.DISABLE` from a callback function. +This does not change which events are set, or any other code locations for the +same event. Disabling events for specific locations is very important for high performance monitoring. For example, a program can be run under a debugger with no overhead if the debugger disables all monitoring except for a few breakpoints. +If :data:`DISABLE` is returned by a callback for a +:ref:`global event `, :exc:`ValueError` will be raised +by the interpreter in a non-specific location (that is, no traceback will be +provided). + .. function:: restart_events() -> None Enable all the events that were disabled by :data:`sys.monitoring.DISABLE` @@ -325,8 +335,6 @@ except for a few breakpoints. Registering callback functions ------------------------------ -To register a callable for events call - .. function:: register_callback(tool_id: int, event: int, func: Callable | None, /) -> Callable | None Registers the callable *func* for the *event* with the given *tool_id* @@ -335,13 +343,17 @@ To register a callable for events call it is unregistered and returned. Otherwise :func:`register_callback` returns ``None``. + .. audit-event:: sys.monitoring.register_callback func sys.monitoring.register_callback Functions can be unregistered by calling ``sys.monitoring.register_callback(tool_id, event, None)``. Callback functions can be registered and unregistered at any time. -Registering or unregistering a callback function will generate a :func:`sys.audit` event. +Callbacks are called only once regardless if the event is turned on both +globally and locally. As such, if an event could be turned on for both global +and local events by your code then the callback needs to be written to handle +either trigger. Callback function arguments @@ -353,37 +365,46 @@ Callback function arguments that there are no arguments to the call. When an active event occurs, the registered callback function is called. +Callback functions returning an object other than :data:`DISABLE` will have no effect. Different events will provide the callback function with different arguments, as follows: * :monitoring-event:`PY_START` and :monitoring-event:`PY_RESUME`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object * :monitoring-event:`PY_RETURN` and :monitoring-event:`PY_YIELD`:: - func(code: CodeType, instruction_offset: int, retval: object) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, retval: object) -> object -* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN`:: +* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN` + (*arg0* can be :data:`MISSING` specifically):: - func(code: CodeType, instruction_offset: int, callable: object, arg0: object | MISSING) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, callable: object, arg0: object) -> object + *code* represents the code object where the call is being made, while + *callable* is the object that is about to be called (and thus + triggered the event). If there are no arguments, *arg0* is set to :data:`sys.monitoring.MISSING`. + For instance methods, *callable* will be the function object as found on the + class with *arg0* set to the instance (i.e. the ``self`` argument to the + method). + * :monitoring-event:`RAISE`, :monitoring-event:`RERAISE`, :monitoring-event:`EXCEPTION_HANDLED`, :monitoring-event:`PY_UNWIND`, :monitoring-event:`PY_THROW` and :monitoring-event:`STOP_ITERATION`:: - func(code: CodeType, instruction_offset: int, exception: BaseException) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, exception: BaseException) -> object * :monitoring-event:`LINE`:: - func(code: CodeType, line_number: int) -> DISABLE | Any + func(code: CodeType, line_number: int) -> object * :monitoring-event:`BRANCH_LEFT`, :monitoring-event:`BRANCH_RIGHT` and :monitoring-event:`JUMP`:: - func(code: CodeType, instruction_offset: int, destination_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, destination_offset: int) -> object Note that the *destination_offset* is where the code will next execute. * :monitoring-event:`INSTRUCTION`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 55e442b20ff877..1a25a9365a0319 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -953,6 +953,8 @@ always available. Unless explicitly noted otherwise, all variables are read-only This function should be used for internal and specialized purposes only. It is not guaranteed to exist in all implementations of Python. + .. versionadded:: 3.12 + .. function:: getobjects(limit[, type]) @@ -1128,10 +1130,14 @@ always available. Unless explicitly noted otherwise, all variables are read-only The size of the seed key of the hash algorithm + .. attribute:: hash_info.cutoff + + Cutoff for small string DJBX33A optimization in range ``[1, cutoff)``. + .. versionadded:: 3.2 .. versionchanged:: 3.4 - Added *algorithm*, *hash_bits* and *seed_bits* + Added *algorithm*, *hash_bits*, *seed_bits*, and *cutoff*. .. data:: hexversion @@ -1185,6 +1191,15 @@ always available. Unless explicitly noted otherwise, all variables are read-only ``cache_tag`` is set to ``None``, it indicates that module caching should be disabled. + *supports_isolated_interpreters* is a boolean value, whether + this implementation supports multiple isolated interpreters. + It is ``True`` for CPython on most platforms. Platforms with + this support implement the low-level :mod:`!_interpreters` module. + + .. seealso:: + + :pep:`684`, :pep:`734`, and :mod:`concurrent.interpreters`. + :data:`sys.implementation` may contain additional attributes specific to the Python implementation. These non-standard attributes must start with an underscore, and are not described here. Regardless of its contents, @@ -1194,6 +1209,9 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. versionadded:: 3.3 + .. versionchanged:: 3.14 + Added ``supports_isolated_interpreters`` field. + .. note:: The addition of new required attributes must go through the normal PEP @@ -1750,7 +1768,7 @@ always available. Unless explicitly noted otherwise, all variables are read-only :func:`settrace` for each thread being debugged or use :func:`threading.settrace`. Trace functions should have three arguments: *frame*, *event*, and - *arg*. *frame* is the current stack frame. *event* is a string: ``'call'``, + *arg*. *frame* is the :ref:`current stack frame `. *event* is a string: ``'call'``, ``'line'``, ``'return'``, ``'exception'`` or ``'opcode'``. *arg* depends on the event type. @@ -1933,6 +1951,22 @@ always available. Unless explicitly noted otherwise, all variables are read-only interpreter is pre-release (alpha, beta, or release candidate) then the local and remote interpreters must be the same exact version. + .. audit-event:: sys.remote_exec pid script_path + + When the code is executed in the remote process, an + :ref:`auditing event ` ``sys.remote_exec`` is raised with + the *pid* and the path to the script file. + This event is raised in the process that called :func:`sys.remote_exec`. + + .. audit-event:: cpython.remote_debugger_script script_path + + When the script is executed in the remote process, an + :ref:`auditing event ` + ``cpython.remote_debugger_script`` is raised + with the path in the remote process. + This event is raised in the remote process, not the one + that called :func:`sys.remote_exec`. + .. availability:: Unix, Windows. .. versionadded:: 3.14 @@ -2161,8 +2195,11 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. data:: api_version - The C API version for this interpreter. Programmers may find this useful when - debugging version conflicts between Python and extension modules. + The C API version, equivalent to the C macro :c:macro:`PYTHON_API_VERSION`. + Defined for backwards compatibility. + + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. .. data:: version_info diff --git a/Doc/library/sysconfig.rst b/Doc/library/sysconfig.rst index 684d14a74c48ab..532facb45f83a0 100644 --- a/Doc/library/sysconfig.rst +++ b/Doc/library/sysconfig.rst @@ -382,22 +382,19 @@ Other functions Examples of returned values: - - linux-i586 - - linux-alpha (?) - - solaris-2.6-sun4u - Windows will return one of: + Windows: - win-amd64 (64-bit Windows on AMD64, aka x86_64, Intel64, and EM64T) - win-arm64 (64-bit Windows on ARM64, aka AArch64) - win32 (all others - specifically, sys.platform is returned) - macOS can return: + POSIX based OS: - - macosx-10.6-ppc - - macosx-10.4-ppc64 - - macosx-10.3-i386 - - macosx-10.4-fat + - linux-x86_64 + - macosx-15.5-arm64 + - macosx-26.0-universal2 (macOS on Apple Silicon or Intel) + - android-24-arm64_v8a For other non-POSIX platforms, currently just returns :data:`sys.platform`. diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f9cb5495e60cd2..e57a0f121c5ce8 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -18,8 +18,16 @@ higher-level functions in :ref:`shutil `. Some facts and figures: -* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives - if the respective modules are available. +* reads and writes :mod:`gzip`, :mod:`bz2`, :mod:`compression.zstd`, and + :mod:`lzma` compressed archives if the respective modules are available. + + .. + The following paragraph should be similar to ../includes/optional-module.rst + + If any of these :term:`optional modules ` are missing from + your copy of CPython, look for documentation from your distributor (that is, + whoever provided Python to you). + If you are the distributor, see :ref:`optional-module-requirements`. * read/write support for the POSIX.1-1988 (ustar) format. @@ -47,6 +55,10 @@ Some facts and figures: or paths outside of the destination. Previously, the filter strategy was equivalent to :func:`fully_trusted `. +.. versionchanged:: 3.14 + + Added support for Zstandard compression using :mod:`compression.zstd`. + .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) Return a :class:`TarFile` object for the pathname *name*. For detailed @@ -59,8 +71,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | mode | action | +==================+=============================================+ - | ``'r' or 'r:*'`` | Open for reading with transparent | - | | compression (recommended). | + | ``'r'`` or | Open for reading with transparent | + | ``'r:*'`` | compression (recommended). | +------------------+---------------------------------------------+ | ``'r:'`` | Open for reading exclusively without | | | compression. | @@ -71,6 +83,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'r:xz'`` | Open for reading with lzma compression. | +------------------+---------------------------------------------+ + | ``'r:zst'`` | Open for reading with Zstandard compression.| + +------------------+---------------------------------------------+ | ``'x'`` or | Create a tarfile exclusively without | | ``'x:'`` | compression. | | | Raise a :exc:`FileExistsError` exception | @@ -88,10 +102,15 @@ Some facts and figures: | | Raise a :exc:`FileExistsError` exception | | | if it already exists. | +------------------+---------------------------------------------+ - | ``'a' or 'a:'`` | Open for appending with no compression. The | - | | file is created if it does not exist. | + | ``'x:zst'`` | Create a tarfile with Zstandard compression.| + | | Raise a :exc:`FileExistsError` exception | + | | if it already exists. | + +------------------+---------------------------------------------+ + | ``'a'`` or | Open for appending with no compression. The | + | ``'a:'`` | file is created if it does not exist. | +------------------+---------------------------------------------+ - | ``'w' or 'w:'`` | Open for uncompressed writing. | + | ``'w'`` or | Open for uncompressed writing. | + | ``'w:'`` | | +------------------+---------------------------------------------+ | ``'w:gz'`` | Open for gzip compressed writing. | +------------------+---------------------------------------------+ @@ -99,6 +118,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'w:xz'`` | Open for lzma compressed writing. | +------------------+---------------------------------------------+ + | ``'w:zst'`` | Open for Zstandard compressed writing. | + +------------------+---------------------------------------------+ Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode* is not suitable to open a certain (compressed) file for reading, @@ -115,6 +136,15 @@ Some facts and figures: For modes ``'w:xz'``, ``'x:xz'`` and ``'w|xz'``, :func:`tarfile.open` accepts the keyword argument *preset* to specify the compression level of the file. + For modes ``'w:zst'``, ``'x:zst'`` and ``'w|zst'``, :func:`tarfile.open` + accepts the keyword argument *level* to specify the compression level of + the file. The keyword argument *options* may also be passed, providing + advanced Zstandard compression parameters described by + :class:`~compression.zstd.CompressionParameter`. The keyword argument + *zstd_dict* can be passed to provide a :class:`~compression.zstd.ZstdDict`, + a Zstandard dictionary used to improve compression of smaller amounts of + data. + For special purposes, there is a second format for *mode*: ``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile` object that processes its data as a stream of blocks. No random seeking will @@ -146,6 +176,9 @@ Some facts and figures: | ``'r|xz'`` | Open an lzma compressed *stream* for | | | reading. | +-------------+--------------------------------------------+ + | ``'r|zst'`` | Open a Zstandard compressed *stream* for | + | | reading. | + +-------------+--------------------------------------------+ | ``'w|'`` | Open an uncompressed *stream* for writing. | +-------------+--------------------------------------------+ | ``'w|gz'`` | Open a gzip compressed *stream* for | @@ -157,6 +190,9 @@ Some facts and figures: | ``'w|xz'`` | Open an lzma compressed *stream* for | | | writing. | +-------------+--------------------------------------------+ + | ``'w|zst'`` | Open a Zstandard compressed *stream* for | + | | writing. | + +-------------+--------------------------------------------+ .. versionchanged:: 3.5 The ``'x'`` (exclusive creation) mode was added. @@ -255,6 +291,15 @@ The :mod:`tarfile` module defines the following exceptions: Raised to refuse extracting a symbolic link pointing outside the destination directory. +.. exception:: LinkFallbackError + + Raised to refuse emulating a link (hard or symbolic) by extracting another + archive member, when that member would be rejected by the filter location. + The exception that was raised to reject the replacement member is available + as :attr:`!BaseException.__context__`. + + .. versionadded:: 3.14 + The following constants are available at the module level: @@ -1068,6 +1113,12 @@ reused in custom filters: Implements the ``'data'`` filter. In addition to what ``tar_filter`` does: + - Normalize link targets (:attr:`TarInfo.linkname`) using + :func:`os.path.normpath`. + Note that this removes internal ``..`` components, which may change the + meaning of the link if the path in :attr:`!TarInfo.linkname` traverses + symbolic links. + - :ref:`Refuse ` to extract links (hard or soft) that link to absolute paths, or ones that link outside the destination. @@ -1099,6 +1150,10 @@ reused in custom filters: Note that this filter does not block *all* dangerous archive features. See :ref:`tarfile-further-verification` for details. + .. versionchanged:: 3.14 + + Link targets are now normalized. + .. _tarfile-extraction-refuse: @@ -1127,6 +1182,7 @@ Here is an incomplete list of things to consider: * Extract to a :func:`new temporary directory ` to prevent e.g. exploiting pre-existing links, and to make it easier to clean up after a failed extraction. +* Disallow symbolic links if you do not need the functionality. * When working with untrusted data, use external (e.g. OS-level) limits on disk, memory and CPU usage. * Check filenames against an allow-list of characters @@ -1305,6 +1361,9 @@ Command-line options Examples -------- +Reading examples +~~~~~~~~~~~~~~~~~~~ + How to extract an entire tar archive to the current working directory:: import tarfile @@ -1327,6 +1386,23 @@ a generator function instead of a list:: tar.extractall(members=py_files(tar)) tar.close() +How to read a gzip compressed tar archive and display some member information:: + + import tarfile + tar = tarfile.open("sample.tar.gz", "r:gz") + for tarinfo in tar: + print(tarinfo.name, "is", tarinfo.size, "bytes in size and is ", end="") + if tarinfo.isreg(): + print("a regular file.") + elif tarinfo.isdir(): + print("a directory.") + else: + print("something else.") + tar.close() + +Writing examples +~~~~~~~~~~~~~~~~ + How to create an uncompressed tar archive from a list of filenames:: import tarfile @@ -1342,19 +1418,15 @@ The same example using the :keyword:`with` statement:: for name in ["foo", "bar", "quux"]: tar.add(name) -How to read a gzip compressed tar archive and display some member information:: +How to create and write an archive to stdout using +:data:`sys.stdout.buffer ` in the *fileobj* parameter +in :meth:`TarFile.add`:: - import tarfile - tar = tarfile.open("sample.tar.gz", "r:gz") - for tarinfo in tar: - print(tarinfo.name, "is", tarinfo.size, "bytes in size and is ", end="") - if tarinfo.isreg(): - print("a regular file.") - elif tarinfo.isdir(): - print("a directory.") - else: - print("something else.") - tar.close() + import sys + import tarfile + with tarfile.open("sample.tar.gz", "w|gz", fileobj=sys.stdout.buffer) as tar: + for name in ["foo", "bar", "quux"]: + tar.add(name) How to create an archive and reset the user information using the *filter* parameter in :meth:`TarFile.add`:: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 0aae14c15a6104..395cde21ccf449 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -851,7 +851,7 @@ The :mod:`test.support` module defines the following functions: Decorator for tests that fill the address space. -.. function:: linked_with_musl() +.. function:: linked_to_musl() Return ``False`` if there is no evidence the interpreter was compiled with ``musl``, otherwise return a version triple, either ``(0, 0, 0)`` if the @@ -1384,6 +1384,13 @@ The :mod:`test.support.threading_helper` module provides support for threading t .. versionadded:: 3.8 +.. function:: run_concurrently(worker_func, nthreads, args=(), kwargs={}) + + Run the worker function concurrently in multiple threads. + Re-raises an exception if any thread raises one, after all threads have + finished. + + :mod:`test.support.os_helper` --- Utilities for os tests ======================================================================== diff --git a/Doc/library/text.rst b/Doc/library/text.rst index 47b678434fc899..92e7dd9a53b80d 100644 --- a/Doc/library/text.rst +++ b/Doc/library/text.rst @@ -16,6 +16,7 @@ Python's built-in string type in :ref:`textseq`. .. toctree:: string.rst + string.templatelib.rst re.rst difflib.rst textwrap.rst diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 249c0a5cb035c3..cabb41442f8419 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -11,6 +11,52 @@ This module constructs higher-level threading interfaces on top of the lower level :mod:`_thread` module. +.. include:: ../includes/wasm-notavail.rst + +Introduction +------------ + +The :mod:`!threading` module provides a way to run multiple `threads +`_ (smaller +units of a process) concurrently within a single process. It allows for the +creation and management of threads, making it possible to execute tasks in +parallel, sharing memory space. Threads are particularly useful when tasks are +I/O bound, such as file operations or making network requests, +where much of the time is spent waiting for external resources. + +A typical use case for :mod:`!threading` includes managing a pool of worker +threads that can process multiple tasks concurrently. Here's a basic example of +creating and starting threads using :class:`~threading.Thread`:: + + import threading + import time + + def crawl(link, delay=3): + print(f"crawl started for {link}") + time.sleep(delay) # Blocking I/O (simulating a network request) + print(f"crawl ended for {link}") + + links = [ + "https://python.org", + "https://docs.python.org", + "https://peps.python.org", + ] + + # Start threads for each link + threads = [] + for link in links: + # Using `args` to pass positional arguments and `kwargs` for keyword arguments + t = threading.Thread(target=crawl, args=(link,), kwargs={"delay": 2}) + threads.append(t) + + # Start each thread + for t in threads: + t.start() + + # Wait for all threads to finish + for t in threads: + t.join() + .. versionchanged:: 3.7 This module used to be optional, it is now always available. @@ -45,7 +91,25 @@ level :mod:`_thread` module. However, threading is still an appropriate model if you want to run multiple I/O-bound tasks simultaneously. -.. include:: ../includes/wasm-notavail.rst +GIL and performance considerations +---------------------------------- + +Unlike the :mod:`multiprocessing` module, which uses separate processes to +bypass the :term:`global interpreter lock` (GIL), the threading module operates +within a single process, meaning that all threads share the same memory space. +However, the GIL limits the performance gains of threading when it comes to +CPU-bound tasks, as only one thread can execute Python bytecode at a time. +Despite this, threads remain a useful tool for achieving concurrency in many +scenarios. + +As of Python 3.13, :term:`free-threaded ` builds +can disable the GIL, enabling true parallel execution of threads, but this +feature is not available by default (see :pep:`703`). + +.. TODO: At some point this feature will become available by default. + +Reference +--------- This module defines the following functions: @@ -62,7 +126,7 @@ This module defines the following functions: Return the current :class:`Thread` object, corresponding to the caller's thread of control. If the caller's thread of control was not created through the - :mod:`threading` module, a dummy thread object with limited functionality is + :mod:`!threading` module, a dummy thread object with limited functionality is returned. The function ``currentThread`` is a deprecated alias for this function. @@ -157,13 +221,13 @@ This module defines the following functions: .. index:: single: trace function - Set a trace function for all threads started from the :mod:`threading` module. + Set a trace function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.settrace` for each thread, before its :meth:`~Thread.run` method is called. .. function:: settrace_all_threads(func) - Set a trace function for all threads started from the :mod:`threading` module + Set a trace function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.settrace` for each thread, before its @@ -186,13 +250,13 @@ This module defines the following functions: .. index:: single: profile function - Set a profile function for all threads started from the :mod:`threading` module. + Set a profile function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.setprofile` for each thread, before its :meth:`~Thread.run` method is called. .. function:: setprofile_all_threads(func) - Set a profile function for all threads started from the :mod:`threading` module + Set a profile function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.setprofile` for each thread, before its @@ -257,8 +321,8 @@ when implemented, are mapped to module-level functions. All of the methods described below are executed atomically. -Thread-Local Data ------------------ +Thread-local data +^^^^^^^^^^^^^^^^^ Thread-local data is data whose values are thread specific. If you have data that you want to be local to a thread, create a @@ -389,8 +453,8 @@ affects what we see:: .. _thread-objects: -Thread Objects --------------- +Thread objects +^^^^^^^^^^^^^^ The :class:`Thread` class represents an activity that is run in a separate thread of control. There are two ways to specify the activity: by passing a @@ -557,7 +621,7 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. - If an attempt is made to join a running daemonic thread in in late stages + If an attempt is made to join a running daemonic thread in late stages of :term:`Python finalization ` :meth:`!join` raises a :exc:`PythonFinalizationError`. @@ -645,8 +709,8 @@ since it is impossible to detect the termination of alien threads. .. _lock-objects: -Lock Objects ------------- +Lock objects +^^^^^^^^^^^^ A primitive lock is a synchronization primitive that is not owned by a particular thread when locked. In Python, it is currently the lowest level @@ -738,8 +802,8 @@ All methods are executed atomically. .. _rlock-objects: -RLock Objects -------------- +RLock objects +^^^^^^^^^^^^^ A reentrant lock is a synchronization primitive that may be acquired multiple times by the same thread. Internally, it uses the concepts of "owning thread" @@ -848,8 +912,8 @@ call release as many times the lock has been acquired can lead to deadlock. .. _condition-objects: -Condition Objects ------------------ +Condition objects +^^^^^^^^^^^^^^^^^ A condition variable is always associated with some kind of lock; this can be passed in or one will be created by default. Passing one in is useful when @@ -1026,8 +1090,8 @@ item to the buffer only needs to wake up one consumer thread. .. _semaphore-objects: -Semaphore Objects ------------------ +Semaphore objects +^^^^^^^^^^^^^^^^^ This is one of the oldest synchronization primitives in the history of computer science, invented by the early Dutch computer scientist Edsger W. Dijkstra (he @@ -1107,7 +1171,7 @@ Semaphores also support the :ref:`context management protocol `. .. _semaphore-examples: -:class:`Semaphore` Example +:class:`Semaphore` example ^^^^^^^^^^^^^^^^^^^^^^^^^^ Semaphores are often used to guard resources with limited capacity, for example, @@ -1135,8 +1199,8 @@ causes the semaphore to be released more than it's acquired will go undetected. .. _event-objects: -Event Objects -------------- +Event objects +^^^^^^^^^^^^^ This is one of the simplest mechanisms for communication between threads: one thread signals an event and other threads wait for it. @@ -1192,8 +1256,8 @@ method. The :meth:`~Event.wait` method blocks until the flag is true. .. _timer-objects: -Timer Objects -------------- +Timer objects +^^^^^^^^^^^^^ This class represents an action that should be run only after a certain amount of time has passed --- a timer. :class:`Timer` is a subclass of :class:`Thread` @@ -1230,8 +1294,8 @@ For example:: only work if the timer is still in its waiting stage. -Barrier Objects ---------------- +Barrier objects +^^^^^^^^^^^^^^^ .. versionadded:: 3.2 diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 542493a82af94d..a8b721b59df348 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -238,8 +238,8 @@ Functions The result has the following attributes: - - *adjustable*: ``True`` if the clock can be changed automatically (e.g. by - a NTP daemon) or manually by the system administrator, ``False`` otherwise + - *adjustable*: ``True`` if the clock can be set to jump forward or backward + in time, ``False`` otherwise. Does not refer to gradual NTP rate adjustments. - *implementation*: The name of the underlying C function used to get the clock value. Refer to :ref:`time-clock-id-constants` for possible values. - *monotonic*: ``True`` if the clock cannot go backward, @@ -306,10 +306,11 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.5 - The function is now always available and always system-wide. + The function is now always available and the clock is now the same for + all processes. .. versionchanged:: 3.10 - On macOS, the function is now system-wide. + On macOS, the clock is now the same for all processes. .. function:: monotonic_ns() -> int @@ -325,7 +326,8 @@ Functions Return the value (in fractional seconds) of a performance counter, i.e. a clock with the highest available resolution to measure a short duration. It - does include time elapsed during sleep and is system-wide. The reference + does include time elapsed during sleep. The clock is the same for all + processes. The reference point of the returned value is undefined, so that only the difference between the results of two calls is valid. @@ -340,7 +342,7 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.10 - On Windows, the function is now system-wide. + On Windows, the clock is now the same for all processes. .. versionchanged:: 3.13 Use the same clock as :func:`time.monotonic`. @@ -394,9 +396,9 @@ Functions On Windows, if *secs* is zero, the thread relinquishes the remainder of its time slice to any other thread that is ready to run. If there are no other threads ready to run, the function returns immediately, and the thread - continues execution. On Windows 8.1 and newer the implementation uses + continues execution. On Windows 10 and newer the implementation uses a `high-resolution timer - `_ + `_ which provides resolution of 100 nanoseconds. If *secs* is zero, ``Sleep(0)`` is used. .. rubric:: Unix implementation @@ -568,7 +570,7 @@ Functions calculations when the day of the week and the year are specified. Here is an example, a format for dates compatible with that specified in the - :rfc:`2822` Internet email standard. [1]_ :: + :rfc:`5322` Internet email standard. [1]_ :: >>> from time import gmtime, strftime >>> strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) @@ -712,13 +714,18 @@ Functions Clock: - * On Windows, call ``GetSystemTimeAsFileTime()``. + * On Windows, call ``GetSystemTimePreciseAsFileTime()``. * Call ``clock_gettime(CLOCK_REALTIME)`` if available. * Otherwise, call ``gettimeofday()``. Use :func:`time_ns` to avoid the precision loss caused by the :class:`float` type. +.. versionchanged:: 3.13 + + On Windows, calls ``GetSystemTimePreciseAsFileTime()`` instead of + ``GetSystemTimeAsFileTime()``. + .. function:: time_ns() -> int @@ -928,7 +935,7 @@ These constants are used as parameters for :func:`clock_getres` and .. data:: CLOCK_TAI - `International Atomic Time `_ + `International Atomic Time `_ The system must have a current leap second table in order for this to give the correct answer. PTP or NTP software can maintain a leap second table. @@ -982,8 +989,8 @@ The following constant is the only parameter that can be sent to .. data:: CLOCK_REALTIME - System-wide real-time clock. Setting this clock requires appropriate - privileges. + Real-time clock. Setting this clock requires appropriate privileges. + The clock is the same for all processes. .. availability:: Unix. @@ -1045,4 +1052,5 @@ Timezone Constants strict reading of the original 1982 :rfc:`822` standard calls for a two-digit year (``%y`` rather than ``%Y``), but practice moved to 4-digit years long before the year 2000. After that, :rfc:`822` became obsolete and the 4-digit year has - been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`. + been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`, + with :rfc:`5322` continuing this requirement. diff --git a/Doc/library/tk.rst b/Doc/library/tk.rst index 0593f8b73ea545..fa3c7e910ce21f 100644 --- a/Doc/library/tk.rst +++ b/Doc/library/tk.rst @@ -1,7 +1,7 @@ .. _tkinter: ********************************* -Graphical User Interfaces with Tk +Graphical user interfaces with Tk ********************************* .. index:: @@ -39,6 +39,7 @@ alternative `GUI frameworks and tools `_ @@ -392,7 +394,7 @@ by spaces. Without getting into too many details, notice the following: * Operations which are implemented as separate *commands* in Tcl (like ``grid`` or ``destroy``) are represented as *methods* on Tkinter widget objects. As you'll see shortly, at other times Tcl uses what appear to be - method calls on widget objects, which more closely mirror what would is + method calls on widget objects, which more closely mirror what is used in Tkinter. @@ -839,8 +841,7 @@ geometry For example: ``fred["geometry"] = "200x100"``. justify - Legal values are the strings: ``"left"``, ``"center"``, ``"right"``, and - ``"fill"``. + Legal values are the strings: ``"left"``, ``"center"``, and ``"right"``. region This is a string with four space-delimited elements, each of which is a legal diff --git a/Doc/library/token.rst b/Doc/library/token.rst index 1f92b5df4302bf..c228006d4c1e1d 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -51,7 +51,7 @@ The token constants are: .. data:: NAME Token value that indicates an :ref:`identifier `. - Note that keywords are also initially tokenized an ``NAME`` tokens. + Note that keywords are also initially tokenized as ``NAME`` tokens. .. data:: NUMBER diff --git a/Doc/library/tulip_coro.dia b/Doc/library/tulip_coro.dia deleted file mode 100644 index 70a33e3c00cf6e..00000000000000 Binary files a/Doc/library/tulip_coro.dia and /dev/null differ diff --git a/Doc/library/tulip_coro.png b/Doc/library/tulip_coro.png deleted file mode 100644 index aad41c93015d09..00000000000000 Binary files a/Doc/library/tulip_coro.png and /dev/null differ diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index fea6b57edf0f1f..95a57c57e71d56 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -29,6 +29,8 @@ introduced in Logo `_, developed by Wally Feurzeig, Seymour Papert and Cynthia Solomon in 1967. +.. include:: ../includes/optional-module.rst + Get started =========== @@ -777,13 +779,17 @@ Turtle motion 180.0 -.. function:: dot(size=None, *color) +.. function:: dot() + dot(size) + dot(color, /) + dot(size, color, /) + dot(size, r, g, b, /) :param size: an integer >= 1 (if given) :param color: a colorstring or a numeric color tuple Draw a circular dot with diameter *size*, using *color*. If *size* is - not given, the maximum of pensize+4 and 2*pensize is used. + not given, the maximum of ``pensize+4`` and ``2*pensize`` is used. .. doctest:: @@ -1152,7 +1158,9 @@ Drawing state Color control ~~~~~~~~~~~~~ -.. function:: pencolor(*args) +.. function:: pencolor() + pencolor(color, /) + pencolor(r, g, b, /) Return or set the pencolor. @@ -1161,7 +1169,7 @@ Color control ``pencolor()`` Return the current pencolor as color specification string or as a tuple (see example). May be used as input to another - color/pencolor/fillcolor call. + color/pencolor/fillcolor/bgcolor call. ``pencolor(colorstring)`` Set pencolor to *colorstring*, which is a Tk color specification string, @@ -1201,7 +1209,9 @@ Color control (50.0, 193.0, 143.0) -.. function:: fillcolor(*args) +.. function:: fillcolor() + fillcolor(color, /) + fillcolor(r, g, b, /) Return or set the fillcolor. @@ -1210,7 +1220,7 @@ Color control ``fillcolor()`` Return the current fillcolor as color specification string, possibly in tuple format (see example). May be used as input to another - color/pencolor/fillcolor call. + color/pencolor/fillcolor/bgcolor call. ``fillcolor(colorstring)`` Set fillcolor to *colorstring*, which is a Tk color specification string, @@ -1244,7 +1254,10 @@ Color control (255.0, 255.0, 255.0) -.. function:: color(*args) +.. function:: color() + color(color, /) + color(r, g, b, /) + color(pencolor, fillcolor, /) Return or set pencolor and fillcolor. @@ -1870,13 +1883,32 @@ Most of the examples in this section refer to a TurtleScreen instance called Window control -------------- -.. function:: bgcolor(*args) +.. function:: bgcolor() + bgcolor(color, /) + bgcolor(r, g, b, /) + + Return or set the background color of the TurtleScreen. + + Four input formats are allowed: + + ``bgcolor()`` + Return the current background color as color specification string or + as a tuple (see example). May be used as input to another + color/pencolor/fillcolor/bgcolor call. - :param args: a color string or three numbers in the range 0..colormode or a - 3-tuple of such numbers + ``bgcolor(colorstring)`` + Set the background color to *colorstring*, which is a Tk color + specification string, such as ``"red"``, ``"yellow"``, or ``"#33cc8c"``. + ``bgcolor((r, g, b))`` + Set the background color to the RGB color represented by the tuple of + *r*, *g*, and *b*. + Each of *r*, *g*, and *b* must be in the range 0..colormode, where + colormode is either 1.0 or 255 (see :func:`colormode`). - Set or return background color of the TurtleScreen. + ``bgcolor(r, g, b)`` + Set the background color to the RGB color represented by *r*, *g*, and *b*. Each of + *r*, *g*, and *b* must be in the range 0..colormode. .. doctest:: :skipif: _tkinter is None @@ -2769,68 +2801,68 @@ The demo scripts are: .. tabularcolumns:: |l|L|L| -+----------------+------------------------------+-----------------------+ -| Name | Description | Features | -+================+==============================+=======================+ -| bytedesign | complex classical | :func:`tracer`, delay,| -| | turtle graphics pattern | :func:`update` | -+----------------+------------------------------+-----------------------+ -| chaos | graphs Verhulst dynamics, | world coordinates | -| | shows that computer's | | -| | computations can generate | | -| | results sometimes against the| | -| | common sense expectations | | -+----------------+------------------------------+-----------------------+ -| clock | analog clock showing time | turtles as clock's | -| | of your computer | hands, ontimer | -+----------------+------------------------------+-----------------------+ -| colormixer | experiment with r, g, b | :func:`ondrag` | -+----------------+------------------------------+-----------------------+ -| forest | 3 breadth-first trees | randomization | -+----------------+------------------------------+-----------------------+ -| fractalcurves | Hilbert & Koch curves | recursion | -+----------------+------------------------------+-----------------------+ -| lindenmayer | ethnomathematics | L-System | -| | (indian kolams) | | -+----------------+------------------------------+-----------------------+ -| minimal_hanoi | Towers of Hanoi | Rectangular Turtles | -| | | as Hanoi discs | -| | | (shape, shapesize) | -+----------------+------------------------------+-----------------------+ -| nim | play the classical nim game | turtles as nimsticks, | -| | with three heaps of sticks | event driven (mouse, | -| | against the computer. | keyboard) | -+----------------+------------------------------+-----------------------+ -| paint | super minimalistic | :func:`onclick` | -| | drawing program | | -+----------------+------------------------------+-----------------------+ -| peace | elementary | turtle: appearance | -| | | and animation | -+----------------+------------------------------+-----------------------+ -| penrose | aperiodic tiling with | :func:`stamp` | -| | kites and darts | | -+----------------+------------------------------+-----------------------+ -| planet_and_moon| simulation of | compound shapes, | -| | gravitational system | :class:`Vec2D` | -+----------------+------------------------------+-----------------------+ -| rosette | a pattern from the wikipedia | :func:`clone`, | -| | article on turtle graphics | :func:`undo` | -+----------------+------------------------------+-----------------------+ -| round_dance | dancing turtles rotating | compound shapes, clone| -| | pairwise in opposite | shapesize, tilt, | -| | direction | get_shapepoly, update | -+----------------+------------------------------+-----------------------+ -| sorting_animate| visual demonstration of | simple alignment, | -| | different sorting methods | randomization | -+----------------+------------------------------+-----------------------+ -| tree | a (graphical) breadth | :func:`clone` | -| | first tree (using generators)| | -+----------------+------------------------------+-----------------------+ -| two_canvases | simple design | turtles on two | -| | | canvases | -+----------------+------------------------------+-----------------------+ -| yinyang | another elementary example | :func:`circle` | -+----------------+------------------------------+-----------------------+ ++------------------------+------------------------------+--------------------------------------+ +| Name | Description | Features | ++========================+==============================+======================================+ +| ``bytedesign`` | complex classical | :func:`tracer`, :func:`delay`, | +| | turtle graphics pattern | :func:`update` | ++------------------------+------------------------------+--------------------------------------+ +| ``chaos`` | graphs Verhulst dynamics, | world coordinates | +| | shows that computer's | | +| | computations can generate | | +| | results sometimes against the| | +| | common sense expectations | | ++------------------------+------------------------------+--------------------------------------+ +| ``clock`` | analog clock showing time | turtles as clock's | +| | of your computer | hands, :func:`ontimer` | ++------------------------+------------------------------+--------------------------------------+ +| ``colormixer`` | experiment with r, g, b | :func:`ondrag` | ++------------------------+------------------------------+--------------------------------------+ +| ``forest`` | 3 breadth-first trees | randomization | ++------------------------+------------------------------+--------------------------------------+ +| ``fractalcurves`` | Hilbert & Koch curves | recursion | ++------------------------+------------------------------+--------------------------------------+ +| ``lindenmayer`` | ethnomathematics | L-System | +| | (indian kolams) | | ++------------------------+------------------------------+--------------------------------------+ +| ``minimal_hanoi`` | Towers of Hanoi | Rectangular Turtles | +| | | as Hanoi discs | +| | | (:func:`shape`, :func:`shapesize`) | ++------------------------+------------------------------+--------------------------------------+ +| ``nim`` | play the classical nim game | turtles as nimsticks, | +| | with three heaps of sticks | event driven (mouse, | +| | against the computer. | keyboard) | ++------------------------+------------------------------+--------------------------------------+ +| ``paint`` | super minimalistic | :func:`onclick` | +| | drawing program | | ++------------------------+------------------------------+--------------------------------------+ +| ``peace`` | elementary | turtle: appearance | +| | | and animation | ++------------------------+------------------------------+--------------------------------------+ +| ``penrose`` | aperiodic tiling with | :func:`stamp` | +| | kites and darts | | ++------------------------+------------------------------+--------------------------------------+ +| ``planet_and_moon`` | simulation of | compound shapes, | +| | gravitational system | :class:`Vec2D` | ++------------------------+------------------------------+--------------------------------------+ +| ``rosette`` | a pattern from the wikipedia | :func:`clone`, | +| | article on turtle graphics | :func:`undo` | ++------------------------+------------------------------+--------------------------------------+ +| ``round_dance`` | dancing turtles rotating | compound shapes, :func:`clone` | +| | pairwise in opposite | :func:`shapesize`, :func:`tilt`, | +| | direction | :func:`get_shapepoly`, :func:`update`| ++------------------------+------------------------------+--------------------------------------+ +| ``sorting_animate`` | visual demonstration of | simple alignment, | +| | different sorting methods | randomization | ++------------------------+------------------------------+--------------------------------------+ +| ``tree`` | a (graphical) breadth | :func:`clone` | +| | first tree (using generators)| | ++------------------------+------------------------------+--------------------------------------+ +| ``two_canvases`` | simple design | turtles on two | +| | | canvases | ++------------------------+------------------------------+--------------------------------------+ +| ``yinyang`` | another elementary example | :func:`circle` | ++------------------------+------------------------------+--------------------------------------+ Have fun! diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 54cc3ea3311adf..862f765b148c73 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -45,15 +45,15 @@ provides backports of these new features to older versions of Python. .. seealso:: - `"Typing cheat sheet" `_ + `Typing cheat sheet `_ A quick overview of type hints (hosted at the mypy docs) - "Type System Reference" section of `the mypy docs `_ + Type System Reference section of `the mypy docs `_ The Python typing system is standardised via PEPs, so this reference should broadly apply to most Python type checkers. (Some parts may still be specific to mypy.) - `"Static Typing with Python" `_ + `Static Typing with Python `_ Type-checker-agnostic documentation written by the community detailing type system features, useful typing related tools and typing best practices. @@ -64,7 +64,7 @@ Specification for the Python Type System ======================================== The canonical, up-to-date specification of the Python type system can be -found at `"Specification for the Python type system" `_. +found at `Specification for the Python type system `_. .. _type-aliases: @@ -230,9 +230,11 @@ For example: callback: Callable[[str], Awaitable[None]] = on_update +.. index:: single: ...; ellipsis literal + The subscription syntax must always be used with exactly two values: the argument list and the return type. The argument list must be a list of types, -a :class:`ParamSpec`, :data:`Concatenate`, or an ellipsis. The return type must +a :class:`ParamSpec`, :data:`Concatenate`, or an ellipsis (``...``). The return type must be a single type. If a literal ellipsis ``...`` is given as the argument list, it indicates that @@ -375,8 +377,11 @@ accepts *any number* of type arguments:: # but ``z`` has been assigned to a tuple of length 3 z: tuple[int] = (1, 2, 3) +.. index:: single: ...; ellipsis literal + To denote a tuple which could be of *any* length, and in which all elements are -of the same type ``T``, use ``tuple[T, ...]``. To denote an empty tuple, use +of the same type ``T``, use the literal ellipsis ``...``: ``tuple[T, ...]``. +To denote an empty tuple, use ``tuple[()]``. Using plain ``tuple`` as an annotation is equivalent to using ``tuple[Any, ...]``:: @@ -1162,6 +1167,8 @@ These can be used as types in annotations. They all support subscription using Special form for annotating higher-order functions. + .. index:: single: ...; ellipsis literal + ``Concatenate`` can be used in conjunction with :ref:`Callable ` and :class:`ParamSpec` to annotate a higher-order callable which adds, removes, or transforms parameters of another @@ -1383,7 +1390,7 @@ These can be used as types in annotations. They all support subscription using Using ``Annotated[T, x]`` as an annotation still allows for static typechecking of ``T``, as type checkers will simply ignore the metadata ``x``. In this way, ``Annotated`` differs from the - :func:`@no_type_check ` decorator, which can also be used for + :deco:`no_type_check` decorator, which can also be used for adding annotations outside the scope of the typing system, but completely disables typechecking for a function or class. @@ -2262,7 +2269,7 @@ without the dedicated syntax, as documented below. .. attribute:: __module__ - The module in which the type alias was defined:: + The name of the module in which the type alias was defined:: >>> type Alias = int >>> Alias.__module__ @@ -2455,7 +2462,7 @@ types. .. attribute:: __module__ - The module in which the new type is defined. + The name of the module in which the new type is defined. .. attribute:: __name__ @@ -2573,7 +2580,7 @@ types. at runtime as soon as the class has been created. Monkey-patching attributes onto a runtime-checkable protocol will still work, but will have no impact on :func:`isinstance` checks comparing objects to the - protocol. See :ref:`"What's new in Python 3.12" ` + protocol. See :ref:`What's new in Python 3.12 ` for more details. @@ -2828,7 +2835,7 @@ Protocols --------- The following protocols are provided by the :mod:`!typing` module. All are decorated -with :func:`@runtime_checkable `. +with :deco:`runtime_checkable`. .. class:: SupportsAbs @@ -3009,7 +3016,7 @@ Functions and decorators The presence of ``@dataclass_transform()`` tells a static type checker that the decorated object performs runtime "magic" that transforms a class in a similar way to - :func:`@dataclasses.dataclass `. + :deco:`dataclasses.dataclass`. Example usage with a decorator function: @@ -3047,14 +3054,14 @@ Functions and decorators The ``CustomerModel`` classes defined above will be treated by type checkers similarly to classes created with - :func:`@dataclasses.dataclass `. + :deco:`dataclasses.dataclass`. For example, type checkers will assume these classes have ``__init__`` methods that accept ``id`` and ``name``. The decorated class, metaclass, or function may accept the following bool arguments which type checkers will assume have the same effect as they would have on the - :func:`@dataclasses.dataclass` decorator: ``init``, + :deco:`dataclasses.dataclass` decorator: ``init``, ``eq``, ``order``, ``unsafe_hash``, ``frozen``, ``match_args``, ``kw_only``, and ``slots``. It must be possible for the value of these arguments (``True`` or ``False``) to be statically evaluated. @@ -3182,12 +3189,12 @@ Functions and decorators .. function:: get_overloads(func) - Return a sequence of :func:`@overload `-decorated definitions for + Return a sequence of :deco:`overload`-decorated definitions for *func*. *func* is the function object for the implementation of the overloaded function. For example, given the definition of ``process`` in - the documentation for :func:`@overload `, + the documentation for :deco:`overload`, ``get_overloads(process)`` will return a sequence of three function objects for the three defined overloads. If called on a function with no overloads, ``get_overloads()`` returns an empty sequence. @@ -3344,7 +3351,7 @@ Introspection helpers If *globalns* or *localns* is not given, appropriate namespace dictionaries are inferred from *obj*. * ``None`` is replaced with :class:`types.NoneType`. - * If :func:`@no_type_check ` has been applied to *obj*, an + * If :deco:`no_type_check` has been applied to *obj*, an empty dictionary is returned. * If *obj* is a class ``C``, the function returns a dictionary that merges annotations from ``C``'s base classes with those on ``C`` directly. This @@ -3353,13 +3360,19 @@ Introspection helpers ``__annotations__`` dictionaries. Annotations on classes appearing earlier in the :term:`method resolution order` always take precedence over annotations on classes appearing later in the method resolution order. - * The function recursively replaces all occurrences of ``Annotated[T, ...]`` + * The function recursively replaces all occurrences of + ``Annotated[T, ...]``, ``Required[T]``, ``NotRequired[T]``, and ``ReadOnly[T]`` with ``T``, unless *include_extras* is set to ``True`` (see :class:`Annotated` for more information). - See also :func:`inspect.get_annotations`, a lower-level function that + See also :func:`annotationlib.get_annotations`, a lower-level function that returns annotations more directly. + .. caution:: + + This function may execute arbitrary code contained in annotations. + See :ref:`annotationlib-security` for more information. + .. note:: If any forward references in the annotations of *obj* are not resolvable @@ -3500,20 +3513,16 @@ Introspection helpers Evaluate an :class:`annotationlib.ForwardRef` as a :term:`type hint`. This is similar to calling :meth:`annotationlib.ForwardRef.evaluate`, - but unlike that method, :func:`!evaluate_forward_ref` also: - - * Recursively evaluates forward references nested within the type hint. - * Raises :exc:`TypeError` when it encounters certain objects that are - not valid type hints. - * Replaces type hints that evaluate to :const:`!None` with - :class:`types.NoneType`. - * Supports the :attr:`~annotationlib.Format.FORWARDREF` and - :attr:`~annotationlib.Format.STRING` formats. + but unlike that method, :func:`!evaluate_forward_ref` also + recursively evaluates forward references nested within the type hint. See the documentation for :meth:`annotationlib.ForwardRef.evaluate` for - the meaning of the *owner*, *globals*, *locals*, and *type_params* parameters. - *format* specifies the format of the annotation and is a member of - the :class:`annotationlib.Format` enum. + the meaning of the *owner*, *globals*, *locals*, *type_params*, and *format* parameters. + + .. caution:: + + This function may execute arbitrary code contained in annotations. + See :ref:`annotationlib-security` for more information. .. versionadded:: 3.14 @@ -3539,28 +3548,32 @@ Constant .. data:: TYPE_CHECKING A special constant that is assumed to be ``True`` by 3rd party static - type checkers. It is ``False`` at runtime. + type checkers. It's ``False`` at runtime. + + A module which is expensive to import, and which only contain types + used for typing annotations, can be safely imported inside an + ``if TYPE_CHECKING:`` block. This prevents the module from actually + being imported at runtime; annotations aren't eagerly evaluated + (see :pep:`649`) so using undefined symbols in annotations is + harmless--as long as you don't later examine them. + Your static type analysis tool will set ``TYPE_CHECKING`` to + ``True`` during static type analysis, which means the module will + be imported and the types will be checked properly during such analysis. Usage:: if TYPE_CHECKING: import expensive_mod - def fun(arg: 'expensive_mod.SomeType') -> None: + def fun(arg: expensive_mod.SomeType) -> None: local_var: expensive_mod.AnotherType = other_fun() - The first type annotation must be enclosed in quotes, making it a - "forward reference", to hide the ``expensive_mod`` reference from the - interpreter runtime. Type annotations for local variables are not - evaluated, so the second annotation does not need to be enclosed in quotes. - - .. note:: - - If ``from __future__ import annotations`` is used, - annotations are not evaluated at function definition time. - Instead, they are stored as strings in ``__annotations__``. - This makes it unnecessary to use quotes around the annotation - (see :pep:`563`). + If you occasionally need to examine type annotations at runtime + which may contain undefined symbols, use + :meth:`annotationlib.get_annotations` with a ``format`` parameter + of :attr:`annotationlib.Format.STRING` or + :attr:`annotationlib.Format.FORWARDREF` to safely retrieve the + annotations without raising :exc:`NameError`. .. versionadded:: 3.5.2 @@ -3776,6 +3789,28 @@ Aliases to container ABCs in :mod:`collections.abc` :class:`collections.abc.Set` now supports subscripting (``[]``). See :pep:`585` and :ref:`types-genericalias`. +.. class:: ByteString(Sequence[int]) + + Deprecated alias to :class:`collections.abc.ByteString`. + + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the :ref:`buffer protocol ` at runtime. For use in + type annotations, either use :class:`~collections.abc.Buffer` or a union + that explicitly specifies the types your code supports (e.g., + ``bytes | bytearray | memoryview``). + + :class:`!ByteString` was originally intended to be an abstract class that + would serve as a supertype of both :class:`bytes` and :class:`bytearray`. + However, since the ABC never had any methods, knowing that an object was an + instance of :class:`!ByteString` never actually told you anything useful + about the object. Other common buffer types such as :class:`memoryview` were + also never understood as subtypes of :class:`!ByteString` (either at runtime + or by static type checkers). + + See :pep:`PEP 688 <688#current-options>` for more details. + + .. deprecated-removed:: 3.9 3.17 + .. class:: Collection(Sized, Iterable[T_co], Container[T_co]) Deprecated alias to :class:`collections.abc.Collection`. @@ -4069,6 +4104,10 @@ convenience. This is subject to change, and not all deprecations are listed. - 3.9 - Undecided (see :ref:`deprecated-aliases` for more information) - :pep:`585` + * - :class:`typing.ByteString` + - 3.9 + - 3.17 + - :gh:`91896` * - :data:`typing.Text` - 3.11 - Undecided diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst index 0aef597d064e0e..b1c2e543718160 100644 --- a/Doc/library/unicodedata.rst +++ b/Doc/library/unicodedata.rst @@ -25,80 +25,133 @@ Standard Annex #44, `"Unicode Character Database" `_. It defines the following functions: +.. seealso:: + + The :ref:`unicode-howto` for more information about Unicode and how to use + this module. + .. function:: lookup(name) Look up character by name. If a character with the given name is found, return the corresponding character. If not found, :exc:`KeyError` is raised. + For example:: + + >>> unicodedata.lookup('LEFT CURLY BRACKET') + '{' + + The characters returned by this function are the same as those produced by + ``\N`` escape sequence in string literals. For example:: + + >>> unicodedata.lookup('MIDDLE DOT') == '\N{MIDDLE DOT}' + True .. versionchanged:: 3.3 Support for name aliases [#]_ and named sequences [#]_ has been added. -.. function:: name(chr[, default]) +.. function:: name(chr, default=None, /) Returns the name assigned to the character *chr* as a string. If no name is defined, *default* is returned, or, if not given, :exc:`ValueError` is - raised. + raised. For example:: + + >>> unicodedata.name('½') + 'VULGAR FRACTION ONE HALF' + >>> unicodedata.name('\uFFFF', 'fallback') + 'fallback' -.. function:: decimal(chr[, default]) +.. function:: decimal(chr, default=None, /) Returns the decimal value assigned to the character *chr* as integer. If no such value is defined, *default* is returned, or, if not given, - :exc:`ValueError` is raised. + :exc:`ValueError` is raised. For example:: + >>> unicodedata.decimal('\N{ARABIC-INDIC DIGIT NINE}') + 9 + >>> unicodedata.decimal('\N{SUPERSCRIPT NINE}', -1) + -1 -.. function:: digit(chr[, default]) + +.. function:: digit(chr, default=None, /) Returns the digit value assigned to the character *chr* as integer. If no such value is defined, *default* is returned, or, if not given, - :exc:`ValueError` is raised. + :exc:`ValueError` is raised:: + + >>> unicodedata.digit('\N{SUPERSCRIPT NINE}') + 9 -.. function:: numeric(chr[, default]) +.. function:: numeric(chr, default=None, /) Returns the numeric value assigned to the character *chr* as float. If no such value is defined, *default* is returned, or, if not given, - :exc:`ValueError` is raised. + :exc:`ValueError` is raised:: + + >>> unicodedata.numeric('½') + 0.5 .. function:: category(chr) Returns the general category assigned to the character *chr* as - string. + string. General category names consist of two letters. + See the `General Category Values section of the Unicode Character + Database documentation `_ + for a list of category codes. For example:: + + >>> unicodedata.category('A') # 'L'etter, 'u'ppercase + 'Lu' .. function:: bidirectional(chr) Returns the bidirectional class assigned to the character *chr* as string. If no such value is defined, an empty string is returned. + See the `Bidirectional Class Values section of the Unicode Character + Database `_ + documentation for a list of bidirectional codes. For example:: + + >>> unicodedata.bidirectional('\N{ARABIC-INDIC DIGIT SEVEN}') # 'A'rabic, 'N'umber + 'AN' .. function:: combining(chr) Returns the canonical combining class assigned to the character *chr* as integer. Returns ``0`` if no combining class is defined. + See the `Canonical Combining Class Values section of the Unicode Character + Database `_ + for more information. .. function:: east_asian_width(chr) Returns the east asian width assigned to the character *chr* as - string. + string. For a list of widths and or more information, see the + `Unicode Standard Annex #11 `_. .. function:: mirrored(chr) Returns the mirrored property assigned to the character *chr* as integer. Returns ``1`` if the character has been identified as a "mirrored" - character in bidirectional text, ``0`` otherwise. + character in bidirectional text, ``0`` otherwise. For example:: + + >>> unicodedata.mirrored('>') + 1 .. function:: decomposition(chr) Returns the character decomposition mapping assigned to the character *chr* as string. An empty string is returned in case no such mapping is - defined. + defined. For example:: + + >>> unicodedata.decomposition('Ã') + '0041 0303' .. function:: normalize(form, unistr) @@ -122,9 +175,9 @@ following functions: normally would be unified with other characters. For example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049 (LATIN CAPITAL LETTER I). However, it is supported in Unicode for compatibility with existing character - sets (e.g. gb2312). + sets (for example, gb2312). - The normal form KD (NFKD) will apply the compatibility decomposition, i.e. + The normal form KD (NFKD) will apply the compatibility decomposition, that is, replace all compatibility characters with their equivalents. The normal form KC (NFKC) first applies the compatibility decomposition, followed by the canonical composition. @@ -133,6 +186,7 @@ following functions: a human reader, if one has combining characters and the other doesn't, they may not compare equal. + .. function:: is_normalized(form, unistr) Return whether the Unicode string *unistr* is in the normal form *form*. Valid @@ -154,24 +208,6 @@ In addition, the module exposes the following constant: Unicode database version 3.2 instead, for applications that require this specific version of the Unicode database (such as IDNA). -Examples: - - >>> import unicodedata - >>> unicodedata.lookup('LEFT CURLY BRACKET') - '{' - >>> unicodedata.name('/') - 'SOLIDUS' - >>> unicodedata.decimal('9') - 9 - >>> unicodedata.decimal('a') - Traceback (most recent call last): - File "", line 1, in - ValueError: not a decimal - >>> unicodedata.category('A') # 'L'etter, 'u'ppercase - 'Lu' - >>> unicodedata.bidirectional('\u0660') # 'A'rabic, 'N'umber - 'AN' - .. rubric:: Footnotes diff --git a/Doc/library/unittest.mock-examples.rst b/Doc/library/unittest.mock-examples.rst index 00cc9bfc0a5f2b..6af4298d44f532 100644 --- a/Doc/library/unittest.mock-examples.rst +++ b/Doc/library/unittest.mock-examples.rst @@ -600,13 +600,13 @@ this list of calls for us:: Partial mocking ~~~~~~~~~~~~~~~ -In some tests I wanted to mock out a call to :meth:`datetime.date.today` -to return a known date, but I didn't want to prevent the code under test from -creating new date objects. Unfortunately :class:`datetime.date` is written in C, and -so I couldn't just monkey-patch out the static :meth:`datetime.date.today` method. +For some tests, you may want to mock out a call to :meth:`datetime.date.today` +to return a known date, but don't want to prevent the code under test from +creating new date objects. Unfortunately :class:`datetime.date` is written in C, +so you cannot just monkey-patch out the static :meth:`datetime.date.today` method. -I found a simple way of doing this that involved effectively wrapping the date -class with a mock, but passing through calls to the constructor to the real +Instead, you can effectively wrap the date +class with a mock, while passing through calls to the constructor to the real class (and returning real instances). The :func:`patch decorator ` is used here to @@ -743,16 +743,15 @@ exception is raised in the setUp then tearDown is not called. Mocking Unbound Methods ~~~~~~~~~~~~~~~~~~~~~~~ -Whilst writing tests today I needed to patch an *unbound method* (patching the -method on the class rather than on the instance). I needed self to be passed -in as the first argument because I want to make asserts about which objects -were calling this particular method. The issue is that you can't patch with a -mock for this, because if you replace an unbound method with a mock it doesn't -become a bound method when fetched from the instance, and so it doesn't get -self passed in. The workaround is to patch the unbound method with a real -function instead. The :func:`patch` decorator makes it so simple to -patch out methods with a mock that having to create a real function becomes a -nuisance. +Sometimes a test needs to patch an *unbound method*, which means patching the +method on the class rather than on the instance. In order to make assertions +about which objects were calling this particular method, you need to pass +``self`` as the first argument. The issue is that you can't patch with a mock for +this, because if you replace an unbound method with a mock it doesn't become +a bound method when fetched from the instance, and so it doesn't get ``self`` +passed in. The workaround is to patch the unbound method with a real function +instead. The :func:`patch` decorator makes it so simple to patch out methods +with a mock that having to create a real function becomes a nuisance. If you pass ``autospec=True`` to patch then it does the patching with a *real* function object. This function object has the same signature as the one @@ -760,8 +759,8 @@ it is replacing, but delegates to a mock under the hood. You still get your mock auto-created in exactly the same way as before. What it means though, is that if you use it to patch out an unbound method on a class the mocked function will be turned into a bound method if it is fetched from an instance. -It will have ``self`` passed in as the first argument, which is exactly what I -wanted: +It will have ``self`` passed in as the first argument, which is exactly what +was needed: >>> class Foo: ... def foo(self): diff --git a/Doc/library/unittest.mock.rst b/Doc/library/unittest.mock.rst index 27c169dde72780..91f90a0726aa93 100644 --- a/Doc/library/unittest.mock.rst +++ b/Doc/library/unittest.mock.rst @@ -639,7 +639,7 @@ the *new_callable* argument to :func:`patch`. This is either ``None`` (if the mock hasn't been called), or the arguments that the mock was last called with. This will be in the form of a tuple: the first member, which can also be accessed through - the ``args`` property, is any ordered arguments the mock was + the ``args`` property, is any positional arguments the mock was called with (or an empty tuple) and the second member, which can also be accessed through the ``kwargs`` property, is any keyword arguments (or an empty dictionary). @@ -2654,9 +2654,9 @@ with any methods on the mock: .. code-block:: pycon - >>> mock.has_data() + >>> mock.header_items() - >>> mock.has_data.assret_called_with() # Intentional typo! + >>> mock.header_items.assret_called_with() # Intentional typo! Auto-speccing solves this problem. You can either pass ``autospec=True`` to :func:`patch` / :func:`patch.object` or use the :func:`create_autospec` function to create a diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 61022fe052ca80..d2ac5eedbbc047 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -109,7 +109,7 @@ Here is a short script to test three string methods:: unittest.main() -A testcase is created by subclassing :class:`unittest.TestCase`. The three +A test case is created by subclassing :class:`unittest.TestCase`. The three individual tests are defined with methods whose names start with the letters ``test``. This naming convention informs the test runner about which methods represent tests. @@ -438,7 +438,7 @@ run whether the test method succeeded or not. Such a working environment for the testing code is called a :dfn:`test fixture`. A new TestCase instance is created as a unique test fixture used to execute each individual test method. Thus -:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`~TestCase.__init__` +:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`!TestCase.__init__` will be called once per test. It is recommended that you use TestCase implementations to group tests together @@ -518,7 +518,7 @@ set-up and tear-down methods:: subclasses will make future test refactorings infinitely easier. In some cases, the existing tests may have been written using the :mod:`doctest` -module. If so, :mod:`doctest` provides a :class:`DocTestSuite` class that can +module. If so, :mod:`doctest` provides a :class:`~doctest.DocTestSuite` class that can automatically build :class:`unittest.TestSuite` instances from the existing :mod:`doctest`\ -based tests. @@ -1023,7 +1023,7 @@ Test cases additional keyword argument *msg*. The context manager will store the caught exception object in its - :attr:`exception` attribute. This can be useful if the intention + :attr:`!exception` attribute. This can be useful if the intention is to perform additional checks on the exception raised:: with self.assertRaises(SomeException) as cm: @@ -1036,7 +1036,7 @@ Test cases Added the ability to use :meth:`assertRaises` as a context manager. .. versionchanged:: 3.2 - Added the :attr:`exception` attribute. + Added the :attr:`!exception` attribute. .. versionchanged:: 3.3 Added the *msg* keyword argument when used as a context manager. @@ -1089,8 +1089,8 @@ Test cases additional keyword argument *msg*. The context manager will store the caught warning object in its - :attr:`warning` attribute, and the source line which triggered the - warnings in the :attr:`filename` and :attr:`lineno` attributes. + :attr:`!warning` attribute, and the source line which triggered the + warnings in the :attr:`!filename` and :attr:`!lineno` attributes. This can be useful if the intention is to perform additional checks on the warning caught:: @@ -1430,7 +1430,7 @@ Test cases that lists the differences between the sets. This method is used by default when comparing sets or frozensets with :meth:`assertEqual`. - Fails if either of *first* or *second* does not have a :meth:`set.difference` + Fails if either of *first* or *second* does not have a :meth:`~frozenset.difference` method. .. versionadded:: 3.1 @@ -1638,7 +1638,7 @@ Test cases .. method:: asyncSetUp() :async: - Method called to prepare the test fixture. This is called after :meth:`setUp`. + Method called to prepare the test fixture. This is called after :meth:`TestCase.setUp`. This is called immediately before calling the test method; other than :exc:`AssertionError` or :exc:`SkipTest`, any exception raised by this method will be considered an error rather than a test failure. The default implementation @@ -1648,7 +1648,7 @@ Test cases :async: Method called immediately after the test method has been called and the - result recorded. This is called before :meth:`tearDown`. This is called even if + result recorded. This is called before :meth:`~TestCase.tearDown`. This is called even if the test method raised an exception, so the implementation in subclasses may need to be particularly careful about checking internal state. Any exception, other than :exc:`AssertionError` or :exc:`SkipTest`, raised by this method will be @@ -1677,7 +1677,7 @@ Test cases Sets up a new event loop to run the test, collecting the result into the :class:`TestResult` object passed as *result*. If *result* is omitted or ``None``, a temporary result object is created (by calling - the :meth:`defaultTestResult` method) and used. The result object is + the :meth:`~TestCase.defaultTestResult` method) and used. The result object is returned to :meth:`run`'s caller. At the end of the test all the tasks in the event loop are cancelled. @@ -1798,7 +1798,7 @@ Grouping tests returned by repeated iterations before :meth:`TestSuite.run` must be the same for each call iteration. After :meth:`TestSuite.run`, callers should not rely on the tests returned by this method unless the caller uses a - subclass that overrides :meth:`TestSuite._removeTestAtIndex` to preserve + subclass that overrides :meth:`!TestSuite._removeTestAtIndex` to preserve test references. .. versionchanged:: 3.2 @@ -1809,10 +1809,10 @@ Grouping tests .. versionchanged:: 3.4 In earlier versions the :class:`TestSuite` held references to each :class:`TestCase` after :meth:`TestSuite.run`. Subclasses can restore - that behavior by overriding :meth:`TestSuite._removeTestAtIndex`. + that behavior by overriding :meth:`!TestSuite._removeTestAtIndex`. In the typical usage of a :class:`TestSuite` object, the :meth:`run` method - is invoked by a :class:`TestRunner` rather than by the end-user test harness. + is invoked by a :class:`!TestRunner` rather than by the end-user test harness. Loading and running tests @@ -1846,12 +1846,12 @@ Loading and running tests .. method:: loadTestsFromTestCase(testCaseClass) Return a suite of all test cases contained in the :class:`TestCase`\ -derived - :class:`testCaseClass`. + :class:`!testCaseClass`. A test case instance is created for each method named by :meth:`getTestCaseNames`. By default these are the method names beginning with ``test``. If :meth:`getTestCaseNames` returns no - methods, but the :meth:`runTest` method is implemented, a single test + methods, but the :meth:`!runTest` method is implemented, a single test case is created for that method instead. @@ -1898,13 +1898,13 @@ Loading and running tests case class will be picked up as "a test method within a test case class", rather than "a callable object". - For example, if you have a module :mod:`SampleTests` containing a - :class:`TestCase`\ -derived class :class:`SampleTestCase` with three test - methods (:meth:`test_one`, :meth:`test_two`, and :meth:`test_three`), the + For example, if you have a module :mod:`!SampleTests` containing a + :class:`TestCase`\ -derived class :class:`!SampleTestCase` with three test + methods (:meth:`!test_one`, :meth:`!test_two`, and :meth:`!test_three`), the specifier ``'SampleTests.SampleTestCase'`` would cause this method to return a suite which will run all three test methods. Using the specifier ``'SampleTests.SampleTestCase.test_two'`` would cause it to return a test - suite which will run only the :meth:`test_two` test method. The specifier + suite which will run only the :meth:`!test_two` test method. The specifier can refer to modules and packages which have not been imported; they will be imported as a side-effect. @@ -2051,7 +2051,7 @@ Loading and running tests Testing frameworks built on top of :mod:`unittest` may want access to the :class:`TestResult` object generated by running a set of tests for reporting purposes; a :class:`TestResult` instance is returned by the - :meth:`TestRunner.run` method for this purpose. + :meth:`!TestRunner.run` method for this purpose. :class:`TestResult` instances have the following attributes that will be of interest when inspecting the results of running a set of tests: @@ -2137,12 +2137,12 @@ Loading and running tests This method can be called to signal that the set of tests being run should be aborted by setting the :attr:`shouldStop` attribute to ``True``. - :class:`TestRunner` objects should respect this flag and return without + :class:`!TestRunner` objects should respect this flag and return without running any additional tests. For example, this feature is used by the :class:`TextTestRunner` class to stop the test framework when the user signals an interrupt from the - keyboard. Interactive tools which provide :class:`TestRunner` + keyboard. Interactive tools which provide :class:`!TestRunner` implementations can use this in a similar manner. The following methods of the :class:`TestResult` class are used to maintain @@ -2462,9 +2462,9 @@ Class and Module Fixtures ------------------------- Class and module level fixtures are implemented in :class:`TestSuite`. When -the test suite encounters a test from a new class then :meth:`tearDownClass` -from the previous class (if there is one) is called, followed by -:meth:`setUpClass` from the new class. +the test suite encounters a test from a new class then +:meth:`~TestCase.tearDownClass` from the previous class (if there is one) +is called, followed by :meth:`~TestCase.setUpClass` from the new class. Similarly if a test is from a different module from the previous test then ``tearDownModule`` from the previous module is run, followed by @@ -2525,6 +2525,10 @@ instead of as an error. setUpModule and tearDownModule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. function:: setUpModule + tearDownModule + :no-typesetting: + These should be implemented as functions:: def setUpModule(): @@ -2556,7 +2560,7 @@ To add cleanup code that must be run even in the case of an exception, use .. versionadded:: 3.8 -.. classmethod:: enterModuleContext(cm) +.. function:: enterModuleContext(cm) Enter the supplied :term:`context manager`. If successful, also add its :meth:`~object.__exit__` method as a cleanup function by diff --git a/Doc/library/unix.rst b/Doc/library/unix.rst index 4553a104d15a24..bae32b362803f1 100644 --- a/Doc/library/unix.rst +++ b/Doc/library/unix.rst @@ -1,7 +1,7 @@ .. _unix: ********************** -Unix Specific Services +Unix-specific services ********************** The modules described in this chapter provide interfaces to features that are @@ -11,6 +11,7 @@ of it. Here's an overview: .. toctree:: + shlex.rst posix.rst pwd.rst grp.rst diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index b0f26724d0c78e..5f796578eaa64e 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -210,6 +210,9 @@ The :mod:`urllib.request` module defines the following functions: Windows a UNC path is returned (as before), and on other platforms a :exc:`~urllib.error.URLError` is raised. + .. versionchanged:: 3.14 + The URL query and fragment components are discarded if present. + .. versionchanged:: 3.14 The *require_scheme* and *resolve_host* parameters were added. @@ -829,10 +832,13 @@ The following attribute and methods should only be used by classes derived from errors. It will be called automatically by the :class:`OpenerDirector` getting the error, and should not normally be called in other circumstances. - *req* will be a :class:`Request` object, *fp* will be a file-like object with - the HTTP error body, *code* will be the three-digit code of the error, *msg* - will be the user-visible explanation of the code and *hdrs* will be a mapping - object with the headers of the error. + :class:`OpenerDirector` will call this method with five positional arguments: + + 1. a :class:`Request` object, + #. a file-like object with the HTTP error body, + #. the three-digit code of the error, as a string, + #. the user-visible explanation of the code, as a string, and + #. the headers of the error, as a mapping object. Return values and exceptions raised should be the same as those of :func:`urlopen`. @@ -1121,7 +1127,7 @@ HTTPHandler Objects .. method:: HTTPHandler.http_open(req) Send an HTTP request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _https-handler-objects: @@ -1133,7 +1139,7 @@ HTTPSHandler Objects .. method:: HTTPSHandler.https_open(req) Send an HTTPS request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _file-handler-objects: diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst index 016fcdc75da67a..674f646c633bbd 100644 --- a/Doc/library/urllib.robotparser.rst +++ b/Doc/library/urllib.robotparser.rst @@ -19,7 +19,7 @@ This module provides a single class, :class:`RobotFileParser`, which answers questions about whether or not a particular user agent can fetch a URL on the -web site that published the :file:`robots.txt` file. For more details on the +website that published the :file:`robots.txt` file. For more details on the structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html. diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 8cce6b98cbcdb3..aa4f1bf940bc5c 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -193,43 +193,52 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid1(node=None, clock_seq=None) - Generate a UUID from a host ID, sequence number, and the current time. If *node* - is not given, :func:`getnode` is used to obtain the hardware address. If - *clock_seq* is given, it is used as the sequence number; otherwise a random - 14-bit sequence number is chosen. + Generate a UUID from a host ID, sequence number, and the current time + according to :rfc:`RFC 9562, §5.1 <9562#section-5.1>`. + + When *node* is not specified, :func:`getnode` is used to obtain the hardware + address as a 48-bit positive integer. When a sequence number *clock_seq* is + not specified, a pseudo-random 14-bit positive integer is generated. + + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. function:: uuid3(namespace, name) Generate a UUID based on the MD5 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.3 <9562#section-5.3>`. .. function:: uuid4() - Generate a random UUID. + Generate a random UUID in a cryptographically-secure method + according to :rfc:`RFC 9562, §5.4 <9562#section-5.4>`. .. function:: uuid5(namespace, name) Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.5 <9562#section-5.5>`. .. function:: uuid6(node=None, clock_seq=None) Generate a UUID from a sequence number and the current time according to - :rfc:`9562`. + :rfc:`RFC 9562, §5.6 <9562#section-5.6>`. + This is an alternative to :func:`uuid1` to improve database locality. When *node* is not specified, :func:`getnode` is used to obtain the hardware address as a 48-bit positive integer. When a sequence number *clock_seq* is not specified, a pseudo-random 14-bit positive integer is generated. - If *node* or *clock_seq* exceed their expected bit count, only their least - significant bits are kept. + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. versionadded:: 3.14 @@ -257,6 +266,10 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. + By default, *a*, *b* and *c* are not generated by a cryptographically + secure pseudo-random number generator (CSPRNG). Use :func:`uuid4` when + a UUID needs to be used in a security-sensitive context. + .. versionadded:: 3.14 @@ -398,7 +411,7 @@ Here are some examples of typical usage of the :mod:`uuid` module:: >>> import uuid >>> # make a UUID based on the host ID and current time - >>> uuid.uuid1() + >>> uuid.uuid1() # doctest: +SKIP UUID('a8098c1a-f86e-11da-bd1a-00112444be1e') >>> # make a UUID using an MD5 hash of a namespace UUID and a name @@ -436,15 +449,24 @@ Here are some examples of typical usage of the :mod:`uuid` module:: >>> uuid.MAX UUID('ffffffff-ffff-ffff-ffff-ffffffffffff') + >>> # same as UUIDv1 but with fields reordered to improve DB locality + >>> uuid.uuid6() # doctest: +SKIP + UUID('1f0799c0-98b9-62db-92c6-a0d365b91053') + >>> # get UUIDv7 creation (local) time as a timestamp in milliseconds >>> u = uuid.uuid7() >>> u.time # doctest: +SKIP 1743936859822 + >>> # get UUIDv7 creation (local) time as a datetime object >>> import datetime as dt >>> dt.datetime.fromtimestamp(u.time / 1000) # doctest: +SKIP datetime.datetime(...) + >>> # make a UUID with custom blocks + >>> uuid.uuid8(0x12345678, 0x9abcdef0, 0x11223344) + UUID('00001234-5678-8ef0-8000-000011223344') + .. _uuid-cli-example: diff --git a/Doc/library/venv.rst b/Doc/library/venv.rst index bed799aedfdfb1..b0eb8ee18fa25f 100644 --- a/Doc/library/venv.rst +++ b/Doc/library/venv.rst @@ -22,10 +22,10 @@ The :mod:`!venv` module supports creating lightweight "virtual environments", each with their own independent set of Python packages installed in their :mod:`site` directories. A virtual environment is created on top of an existing -Python installation, known as the virtual environment's "base" Python, and may -optionally be isolated from the packages in the base environment, -so only those explicitly installed in the virtual environment are available. -See :ref:`sys-path-init-virtual-environments` and :mod:`site`'s +Python installation, known as the virtual environment's "base" Python, and by +default is isolated from the packages in the base environment, +so that only those explicitly installed in the virtual environment are +available. See :ref:`sys-path-init-virtual-environments` and :mod:`site`'s :ref:`virtual environments documentation ` for more information. @@ -78,7 +78,7 @@ It also creates a :file:`bin` (or :file:`Scripts` on Windows) subdirectory containing a copy or symlink of the Python executable (as appropriate for the platform or arguments used at environment creation time). It also creates a :file:`lib/pythonX.Y/site-packages` subdirectory -(on Windows, this is :file:`Lib\site-packages`). +(on Windows, this is :file:`Lib\\site-packages`). If an existing directory is specified, it will be re-used. .. versionchanged:: 3.5 @@ -105,36 +105,52 @@ The command, if run with ``-h``, will show the available options:: Creates virtual Python environments in one or more target directories. - positional arguments: - ENV_DIR A directory to create the environment in. - - options: - -h, --help show this help message and exit - --system-site-packages - Give the virtual environment access to the system - site-packages dir. - --symlinks Try to use symlinks rather than copies, when - symlinks are not the default for the platform. - --copies Try to use copies rather than symlinks, even when - symlinks are the default for the platform. - --clear Delete the contents of the environment directory - if it already exists, before environment creation. - --upgrade Upgrade the environment directory to use this - version of Python, assuming Python has been - upgraded in-place. - --without-pip Skips installing or upgrading pip in the virtual - environment (pip is bootstrapped by default) - --prompt PROMPT Provides an alternative prompt prefix for this - environment. - --upgrade-deps Upgrade core dependencies (pip) to the latest - version in PyPI - --without-scm-ignore-files - Skips adding SCM ignore files to the environment - directory (Git is supported by default). - Once an environment has been created, you may wish to activate it, e.g. by sourcing an activate script in its bin directory. +.. _venv-cli: +.. program:: venv + +.. option:: ENV_DIR + + A required argument specifying the directory to create the environment in. + +.. option:: --system-site-packages + + Give the virtual environment access to the system site-packages directory. + +.. option:: --symlinks + + Try to use symlinks rather than copies, when symlinks are not the default for the platform. + +.. option:: --copies + + Try to use copies rather than symlinks, even when symlinks are the default for the platform. + +.. option:: --clear + + Delete the contents of the environment directory if it already exists, before environment creation. + +.. option:: --upgrade + + Upgrade the environment directory to use this version of Python, assuming Python has been upgraded in-place. + +.. option:: --without-pip + + Skips installing or upgrading pip in the virtual environment (pip is bootstrapped by default). + +.. option:: --prompt + + Provides an alternative prompt prefix for this environment. + +.. option:: --upgrade-deps + + Upgrade core dependencies (pip) to the latest version in PyPI. + +.. option:: --without-scm-ignore-files + + Skips adding SCM ignore files to the environment directory (Git is supported by default). + .. versionchanged:: 3.4 Installs pip by default, added the ``--without-pip`` and ``--copies`` diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst index 00bafd1be4bd0c..15b62a0653d73d 100644 --- a/Doc/library/warnings.rst +++ b/Doc/library/warnings.rst @@ -80,7 +80,9 @@ The following warnings category classes are currently defined: | | unless triggered by code in ``__main__``). | +----------------------------------+-----------------------------------------------+ | :exc:`SyntaxWarning` | Base category for warnings about dubious | -| | syntactic features. | +| | syntactic features (typically emitted when | +| | compiling Python source code, and hence | +| | may not be suppressed by runtime filters) | +----------------------------------+-----------------------------------------------+ | :exc:`RuntimeWarning` | Base category for warnings about dubious | | | runtime features. | @@ -458,7 +460,7 @@ Available Functions lower.one_way(**kw) This makes the warning refer to both the ``example.lower.one_way()`` and - ``package.higher.another_way()`` call sites only from calling code living + ``example.higher.another_way()`` call sites only from calling code living outside of ``example`` package. *source*, if supplied, is the destroyed object which emitted a @@ -474,14 +476,21 @@ Available Functions .. function:: warn_explicit(message, category, filename, lineno, module=None, registry=None, module_globals=None, source=None) This is a low-level interface to the functionality of :func:`warn`, passing in - explicitly the message, category, filename and line number, and optionally the - module name and the registry (which should be the ``__warningregistry__`` - dictionary of the module). The module name defaults to the filename with - ``.py`` stripped; if no registry is passed, the warning is never suppressed. + explicitly the message, category, filename and line number, and optionally + other arguments. *message* must be a string and *category* a subclass of :exc:`Warning` or *message* may be a :exc:`Warning` instance, in which case *category* will be ignored. + *module*, if supplied, should be the module name. + If no module is passed, the filename with ``.py`` stripped is used. + + *registry*, if supplied, should be the ``__warningregistry__`` dictionary + of the module. + If no registry is passed, each warning is treated as the first occurrence, + that is, filter actions ``"default"``, ``"module"`` and ``"once"`` are + handled as ``"always"``. + *module_globals*, if supplied, should be the global namespace in use by the code for which the warning is issued. (This argument is used to support displaying source for modules found in zipfiles or other non-filesystem import @@ -584,7 +593,7 @@ Available Functions The deprecation message passed to the decorator is saved in the ``__deprecated__`` attribute on the decorated object. If applied to an overload, the decorator - must be after the :func:`@overload ` decorator + must be after the :deco:`~typing.overload` decorator for the attribute to exist on the overload as returned by :func:`typing.get_overloads`. diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst index fd6abc70261019..a2103d8fdd8efe 100644 --- a/Doc/library/webbrowser.rst +++ b/Doc/library/webbrowser.rst @@ -49,6 +49,11 @@ a new tab, with the browser being brought to the foreground. The use of the :mod:`webbrowser` module on iOS requires the :mod:`ctypes` module. If :mod:`ctypes` isn't available, calls to :func:`.open` will fail. +.. _webbrowser-cli: + +Command-line interface +---------------------- + .. program:: webbrowser The script :program:`webbrowser` can be used as a command-line interface for the @@ -232,7 +237,7 @@ Here are some simple examples:: .. _browser-controllers: -Browser Controller Objects +Browser controller objects -------------------------- Browser controllers provide the :attr:`~controller.name` attribute, diff --git a/Doc/library/winreg.rst b/Doc/library/winreg.rst index b3a824fb69a49f..6d1e8ecfc1741d 100644 --- a/Doc/library/winreg.rst +++ b/Doc/library/winreg.rst @@ -14,6 +14,8 @@ integer as the registry handle, a :ref:`handle object ` is used to ensure that the handles are closed correctly, even if the programmer neglects to explicitly close them. +.. availability:: Windows. + .. _exception-changed: .. versionchanged:: 3.3 @@ -753,9 +755,6 @@ Handle objects provide semantics for :meth:`~object.__bool__` -- thus :: will print ``Yes`` if the handle is currently valid (has not been closed or detached). -The object also support comparison semantics, so handle objects will compare -true if they both reference the same underlying Windows handle value. - Handle objects can be converted to an integer (e.g., using the built-in :func:`int` function), in which case the underlying Windows handle value is returned. You can also use the :meth:`~PyHKEY.Detach` method to return the diff --git a/Doc/library/winsound.rst b/Doc/library/winsound.rst index 925984c3cdb0cb..93c0c025982076 100644 --- a/Doc/library/winsound.rst +++ b/Doc/library/winsound.rst @@ -13,6 +13,8 @@ The :mod:`winsound` module provides access to the basic sound-playing machinery provided by Windows platforms. It includes functions and several constants. +.. availability:: Windows. + .. function:: Beep(frequency, duration) diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 00a18751207e7a..9ffedf7366a7b8 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -19,11 +19,10 @@ not already proficient with the DOM should consider using the :mod:`xml.etree.ElementTree` module for their XML processing instead. -.. warning:: +.. note:: - The :mod:`xml.dom.minidom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. DOM applications typically start by parsing some XML into a DOM. With diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index fd96765cbe3c96..a21cfaa4645419 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -19,11 +19,10 @@ responsible for explicitly pulling events from the stream, looping over those events until either processing is finished or an error condition occurs. -.. warning:: +.. note:: - The :mod:`xml.dom.pulldom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 @@ -75,7 +74,7 @@ given point) or to make use of the :func:`DOMEventStream.expandNode` method and switch to DOM-related processing. -.. class:: PullDom(documentFactory=None) +.. class:: PullDOM(documentFactory=None) Subclass of :class:`xml.sax.handler.ContentHandler`. diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 1daf6628013bf0..00075ac2a23e6b 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -20,11 +20,10 @@ for parsing and creating XML data. The :mod:`!xml.etree.cElementTree` module is deprecated. -.. warning:: +.. note:: - The :mod:`xml.etree.ElementTree` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. Tutorial -------- diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index d495995398959d..acd8d399fe32fc 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -15,12 +15,10 @@ XML Processing Modules Python's interfaces for processing XML are grouped in the ``xml`` package. -.. warning:: +.. note:: - The XML modules are not secure against erroneous or maliciously - constructed data. If you need to parse untrusted or - unauthenticated data see the :ref:`xml-vulnerabilities` and - :ref:`defusedxml-package` sections. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. It is important to note that modules in the :mod:`xml` package require that there be at least one SAX-compliant XML parser available. The Expat parser is @@ -47,46 +45,33 @@ The XML handling submodules are: * :mod:`xml.parsers.expat`: the Expat parser binding +.. _xml-security: .. _xml-vulnerabilities: -XML vulnerabilities -------------------- +XML security +------------ -The XML processing modules are not secure against maliciously constructed data. An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or -circumvent firewalls. - -The following table gives an overview of the known attacks and whether -the various modules are vulnerable to them. - -========================= ================== ================== ================== ================== ================== -kind sax etree minidom pulldom xmlrpc -========================= ================== ================== ================== ================== ================== -billion laughs **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -quadratic blowup **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -external entity expansion Safe (5) Safe (2) Safe (3) Safe (5) Safe (4) -`DTD`_ retrieval Safe (5) Safe Safe Safe (5) Safe -decompression bomb Safe Safe Safe Safe **Vulnerable** -large tokens **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) -========================= ================== ================== ================== ================== ================== - -1. Expat 2.4.1 and newer is not vulnerable to the "billion laughs" and - "quadratic blowup" vulnerabilities. Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. -2. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a - :exc:`~xml.etree.ElementTree.ParseError` when an entity occurs. -3. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns - the unexpanded entity verbatim. -4. :mod:`xmlrpc.client` doesn't expand external entities and omits them. -5. Since Python 3.7.1, external general entities are no longer processed by - default. -6. Expat 2.6.0 and newer is not vulnerable to denial of service - through quadratic runtime caused by parsing large tokens. - Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. +circumvent firewalls when attacker-controlled XML is being parsed, +in Python or elsewhere. + +The built-in XML parsers of Python rely on the library `libexpat`_, commonly +called Expat, for parsing XML. + +By default, Expat itself does not access local files or create network +connections. + +Expat versions lower than 2.7.2 may be vulnerable to the "billion laughs", +"quadratic blowup" and "large tokens" vulnerabilities, or to disproportional +use of dynamic memory. +Python bundles a copy of Expat, and whether Python uses the bundled or a +system-wide Expat, depends on how the Python interpreter +:option:`has been configured <--with-system-expat>` in your environment. +Python may be vulnerable if it uses such older versions of Expat. +Check :const:`!pyexpat.EXPAT_VERSION`. + +:mod:`xmlrpc` is **vulnerable** to the "decompression bomb" attack. billion laughs / exponential entity expansion @@ -103,16 +88,6 @@ quadratic blowup entity expansion efficient as the exponential case but it avoids triggering parser countermeasures that forbid deeply nested entities. -external entity expansion - Entity declarations can contain more than just text for replacement. They can - also point to external resources or local files. The XML - parser accesses the resource and embeds the content into the XML document. - -`DTD`_ retrieval - Some XML libraries like Python's :mod:`xml.dom.pulldom` retrieve document type - definitions from remote or local locations. The feature has similar - implications as the external entity expansion issue. - decompression bomb Decompression bombs (aka `ZIP bomb`_) apply to all XML libraries that can parse compressed XML streams such as gzipped HTTP streams or @@ -126,21 +101,6 @@ large tokens be used to cause denial of service in the application parsing XML. The issue is known as :cve:`2023-52425`. -The documentation for :pypi:`defusedxml` on PyPI has further information about -all known attack vectors with examples and references. - -.. _defusedxml-package: - -The :mod:`!defusedxml` Package ------------------------------- - -:pypi:`defusedxml` is a pure Python package with modified subclasses of all stdlib -XML parsers that prevent any potentially malicious operation. Use of this -package is recommended for any server code that parses untrusted XML data. The -package also ships with example exploits and extended documentation on more -XML exploits such as XPath injection. - - +.. _libexpat: https://github.com/libexpat/libexpat .. _Billion Laughs: https://en.wikipedia.org/wiki/Billion_laughs .. _ZIP bomb: https://en.wikipedia.org/wiki/Zip_bomb -.. _DTD: https://en.wikipedia.org/wiki/Document_type_definition diff --git a/Doc/library/xml.sax.handler.rst b/Doc/library/xml.sax.handler.rst index c2c9d6424b5072..f1af7253e437b4 100644 --- a/Doc/library/xml.sax.handler.rst +++ b/Doc/library/xml.sax.handler.rst @@ -96,6 +96,14 @@ for the feature and property names. .. data:: feature_external_ges + .. warning:: + + Enabling opens a vulnerability to + `external entity attacks `_ + if the parser is used with user-provided XML content. + Please reflect on your `threat model `_ + before enabling this feature. + | value: ``"http://xml.org/sax/features/external-general-entities"`` | true: Include all external general (text) entities. | false: Do not include external general entities. @@ -248,8 +256,7 @@ events in the input document: The *name* parameter contains the raw XML 1.0 name of the element type as a string and the *attrs* parameter holds an object of the - :class:`~xml.sax.xmlreader.Attributes` - interface (see :ref:`attributes-objects`) containing the attributes of + :ref:`Attributes ` interface containing the attributes of the element. The object passed as *attrs* may be re-used by the parser; holding on to a reference to it is not a reliable way to keep a copy of the attributes. To keep a copy of the attributes, use the :meth:`copy` method of the *attrs* @@ -271,8 +278,7 @@ events in the input document: The *name* parameter contains the name of the element type as a ``(uri, localname)`` tuple, the *qname* parameter contains the raw XML 1.0 name used in the source document, and the *attrs* parameter holds an instance of the - :class:`~xml.sax.xmlreader.AttributesNS` interface (see - :ref:`attributes-ns-objects`) + :ref:`AttributesNS ` interface containing the attributes of the element. If no namespace is associated with the element, the *uri* component of *name* will be ``None``. The object passed as *attrs* may be re-used by the parser; holding on to a reference to it is not diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index c60e9e505f7544..5fa92645a440ce 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -18,11 +18,10 @@ SAX exceptions and the convenience functions which will be most used by users of the SAX API. -.. warning:: +.. note:: - The :mod:`xml.sax` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst index 5ee11d58c3dd26..7731f03d875efc 100644 --- a/Doc/library/xml.sax.utils.rst +++ b/Doc/library/xml.sax.utils.rst @@ -37,7 +37,7 @@ or as base classes. You can unescape other strings of data by passing a dictionary as the optional *entities* parameter. The keys and values must all be strings; each key will be - replaced with its corresponding value. ``'&'``, ``'<'``, and ``'>'`` + replaced with its corresponding value. ``'&'``, ``'<'``, and ``'>'`` are always unescaped, even if *entities* is provided. diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index 654154cb43d6e5..a21c7d3e4e3ad5 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -24,8 +24,8 @@ between conformable Python objects and XML on the wire. .. warning:: The :mod:`xmlrpc.client` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. versionchanged:: 3.5 @@ -524,7 +524,7 @@ Convenience Functions .. function:: dumps(params, methodname=None, methodresponse=None, encoding=None, allow_none=False) - Convert *params* into an XML-RPC request. or into a response if *methodresponse* + Convert *params* into an XML-RPC request, or into a response if *methodresponse* is true. *params* can be either a tuple of arguments or an instance of the :exc:`Fault` exception class. If *methodresponse* is true, only a single value can be returned, meaning that *params* must be of length 1. *encoding*, if diff --git a/Doc/library/xmlrpc.server.rst b/Doc/library/xmlrpc.server.rst index 06169c7eca8b0c..2a8f6f8d5fc0de 100644 --- a/Doc/library/xmlrpc.server.rst +++ b/Doc/library/xmlrpc.server.rst @@ -20,8 +20,8 @@ servers written in Python. Servers can either be free standing, using .. warning:: The :mod:`xmlrpc.server` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. include:: ../includes/wasm-notavail.rst diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 6a4fa67332e179..ae4e25b13b92cd 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -16,13 +16,23 @@ provides tools to create, read, write, append, and list a ZIP file. Any advanced use of this module will require an understanding of the format, as defined in `PKZIP Application Note`_. -This module does not currently handle multi-disk ZIP files. +This module does not handle multipart ZIP files. It can handle ZIP files that use the ZIP64 extensions (that is ZIP files that are more than 4 GiB in size). It supports -decryption of encrypted files in ZIP archives, but it currently cannot +decryption of encrypted files in ZIP archives, but it cannot create an encrypted file. Decryption is extremely slow as it is implemented in native Python rather than C. +.. + The following paragraph should be similar to ../includes/optional-module.rst + +Handling compressed archives requires :term:`optional modules ` +such as :mod:`zlib`, :mod:`bz2`, :mod:`lzma`, and :mod:`compression.zstd`. +If any of them are missing from your copy of CPython, +look for documentation from your distributor (that is, +whoever provided Python to you). +If you are the distributor, see :ref:`optional-module-requirements`. + The module defines the following items: .. exception:: BadZipFile @@ -129,14 +139,28 @@ The module defines the following items: .. versionadded:: 3.3 +.. data:: ZIP_ZSTANDARD + + The numeric constant for Zstandard compression. This requires the + :mod:`compression.zstd` module. + .. note:: - The ZIP file format specification has included support for bzip2 compression - since 2001, and for LZMA compression since 2006. However, some tools - (including older Python releases) do not support these compression - methods, and may either refuse to process the ZIP file altogether, - or fail to extract individual files. + In APPNOTE 6.3.7, the method ID ``20`` was assigned to Zstandard + compression. This was changed in APPNOTE 6.3.8 to method ID ``93`` to + avoid conflicts, with method ID ``20`` being deprecated. For + compatibility, the :mod:`!zipfile` module reads both method IDs but will + only write data with method ID ``93``. + + .. versionadded:: 3.14 + +.. note:: + The ZIP file format specification has included support for bzip2 compression + since 2001, for LZMA compression since 2006, and Zstandard compression since + 2020. However, some tools (including older Python releases) do not support + these compression methods, and may either refuse to process the ZIP file + altogether, or fail to extract individual files. .. seealso:: @@ -151,7 +175,7 @@ The module defines the following items: .. _zipfile-objects: -ZipFile Objects +ZipFile objects --------------- @@ -176,10 +200,11 @@ ZipFile Objects *compression* is the ZIP compression method to use when writing the archive, and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, - :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized - values will cause :exc:`NotImplementedError` to be raised. If - :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified - but the corresponding module (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not + :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD`; + unrecognized values will cause :exc:`NotImplementedError` to be raised. If + :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or + :const:`ZIP_ZSTANDARD` is specified but the corresponding module + (:mod:`zlib`, :mod:`bz2`, :mod:`lzma`, or :mod:`compression.zstd`) is not available, :exc:`RuntimeError` is raised. The default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` (the default) zipfile will create ZIP files that @@ -194,6 +219,10 @@ ZipFile Objects (see :class:`zlib ` for more information). When using :const:`ZIP_BZIP2` integers ``1`` through ``9`` are accepted (see :class:`bz2 ` for more information). + When using :const:`ZIP_ZSTANDARD` integers ``-131072`` through ``22`` are + commonly accepted (see + :attr:`CompressionParameter.compression_level ` + for more on retrieving valid values and their meaning). The *strict_timestamps* argument, when set to ``False``, allows to zip files older than 1980-01-01 at the cost of setting the @@ -219,7 +248,7 @@ ZipFile Objects .. note:: *metadata_encoding* is an instance-wide setting for the ZipFile. - It is not currently possible to set this on a per-member basis. + It is not possible to set this on a per-member basis. This attribute is a workaround for legacy implementations which produce archives with names in the current locale encoding or code page (mostly @@ -415,9 +444,10 @@ ZipFile Objects read or append. *pwd* is the password used for encrypted files as a :class:`bytes` object and, if specified, overrides the default password set with :meth:`setpassword`. Calling :meth:`read` on a ZipFile that uses a compression method other than - :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or - :const:`ZIP_LZMA` will raise a :exc:`NotImplementedError`. An error will also - be raised if the corresponding compression module is not available. + :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, + :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD` will raise a + :exc:`NotImplementedError`. An error will also be raised if the + corresponding compression module is not available. .. versionchanged:: 3.6 Calling :meth:`read` on a closed ZipFile will raise a :exc:`ValueError`. @@ -538,18 +568,10 @@ The following data attributes are also available: it should be no longer than 65535 bytes. Comments longer than this will be truncated. -.. attribute:: ZipFile.data_offset - - The offset to the start of ZIP data from the beginning of the file. When the - :class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the - underlying file does not support ``tell()``, the value will be ``None`` - instead. - - .. versionadded:: 3.14 .. _path-objects: -Path Objects +Path objects ------------ .. class:: Path(root, at='') @@ -685,7 +707,7 @@ changes. .. _pyzipfile-objects: -PyZipFile Objects +PyZipFile objects ----------------- The :class:`PyZipFile` constructor takes the same parameters as the @@ -762,7 +784,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the .. _zipinfo-objects: -ZipInfo Objects +ZipInfo objects --------------- Instances of the :class:`ZipInfo` class are returned by the :meth:`.getinfo` and @@ -818,7 +840,10 @@ Instances have the following methods and attributes: .. attribute:: ZipInfo.date_time The time and date of the last modification to the archive member. This is a - tuple of six values: + tuple of six values representing the "last [modified] file time" and "last [modified] file date" + fields from the ZIP file's central directory. + + The tuple contains: +-------+--------------------------+ | Index | Value | @@ -838,7 +863,15 @@ Instances have the following methods and attributes: .. note:: - The ZIP file format does not support timestamps before 1980. + The ZIP format supports multiple timestamp fields in different locations + (central directory, extra fields for NTFS/UNIX systems, etc.). This attribute + specifically returns the timestamp from the central directory. The central + directory timestamp format in ZIP files does not support timestamps before + 1980. While some extra field formats (such as UNIX timestamps) can represent + earlier dates, this attribute only returns the central directory timestamp. + + The central directory timestamp is interpreted as representing local + time, rather than UTC time, to match the behavior of other zip tools. .. attribute:: ZipInfo.compress_type @@ -921,7 +954,7 @@ Instances have the following methods and attributes: .. _zipfile-commandline: .. program:: zipfile -Command-Line Interface +Command-line interface ---------------------- The :mod:`zipfile` module provides a simple command-line interface to interact @@ -996,7 +1029,7 @@ From file itself Decompression may fail due to incorrect password / CRC checksum / ZIP format or unsupported compression method / decryption. -File System limitations +File system limitations ~~~~~~~~~~~~~~~~~~~~~~~ Exceeding limitations on different file systems can cause decompression failed. diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst index cd76f29a556939..ead87c2d6ad2e5 100644 --- a/Doc/library/zipimport.rst +++ b/Doc/library/zipimport.rst @@ -188,17 +188,20 @@ Here is an example that imports a module from a ZIP archive - note that the .. code-block:: shell-session - $ unzip -l example.zip - Archive: example.zip + $ unzip -l example_archive.zip + Archive: example_archive.zip Length Date Time Name -------- ---- ---- ---- - 8467 11-26-02 22:30 jwzthreading.py + 8467 01-01-00 12:30 example.py -------- ------- 8467 1 file - $ ./python - Python 2.3 (#1, Aug 1 2003, 19:54:32) + +.. code-block:: pycon + >>> import sys - >>> sys.path.insert(0, 'example.zip') # Add .zip file to front of path - >>> import jwzthreading - >>> jwzthreading.__file__ - 'example.zip/jwzthreading.py' + >>> # Add the archive to the front of the module search path + >>> sys.path.insert(0, 'example_archive.zip') + >>> import example + >>> example.__file__ + 'example_archive.zip/example.py' + diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 75ead3c4cb144c..8e2446544a29ed 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -8,15 +8,13 @@ -------------- For applications that require data compression, the functions in this module -allow compression and decompression, using the zlib library. The zlib library -has its own home page at https://www.zlib.net. There are known -incompatibilities between the Python module and versions of the zlib library -earlier than 1.1.3; 1.1.3 has a `security vulnerability `_, so we recommend using -1.1.4 or later. +allow compression and decompression, using the `zlib library `_. + +.. include:: ../includes/optional-module.rst zlib's functions have many options and often need to be used in a particular order. This documentation doesn't attempt to cover all of the permutations; -consult the zlib manual at http://www.zlib.net/manual.html for authoritative +consult the `zlib manual `_ for authoritative information. For reading and writing ``.gz`` files see the :mod:`gzip` module. @@ -44,21 +42,20 @@ The available exception and functions in this module are: .. versionchanged:: 3.0 The result is always unsigned. -.. function:: compress(data, /, level=-1, wbits=MAX_WBITS) +.. function:: compress(data, /, level=Z_DEFAULT_COMPRESSION, wbits=MAX_WBITS) Compresses the bytes in *data*, returning a bytes object containing compressed data. *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; - ``1`` (Z_BEST_SPEED) is fastest and produces the least compression, ``9`` (Z_BEST_COMPRESSION) - is slowest and produces the most. ``0`` (Z_NO_COMPRESSION) is no compression. - The default value is ``-1`` (Z_DEFAULT_COMPRESSION). Z_DEFAULT_COMPRESSION represents a default - compromise between speed and compression (currently equivalent to level 6). + See :const:`Z_BEST_SPEED` (``1``), :const:`Z_BEST_COMPRESSION` (``9``), + :const:`Z_NO_COMPRESSION` (``0``), and the default, + :const:`Z_DEFAULT_COMPRESSION` (``-1``) for more information about these values. .. _compress-wbits: The *wbits* argument controls the size of the history buffer (or the "window size") used when compressing data, and whether a header and trailer is included in the output. It can take several ranges of values, - defaulting to ``15`` (MAX_WBITS): + defaulting to ``15`` (:const:`MAX_WBITS`): * +9 to +15: The base-two logarithm of the window size, which therefore ranges between 512 and 32768. Larger values produce @@ -82,17 +79,15 @@ The available exception and functions in this module are: The *wbits* parameter is now available to set window bits and compression type. -.. function:: compressobj(level=-1, method=DEFLATED, wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=Z_DEFAULT_STRATEGY[, zdict]) +.. function:: compressobj(level=Z_DEFAULT_COMPRESSION, method=DEFLATED, wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=Z_DEFAULT_STRATEGY[, zdict]) Returns a compression object, to be used for compressing data streams that won't fit into memory at once. *level* is the compression level -- an integer from ``0`` to ``9`` or ``-1``. - A value of ``1`` (Z_BEST_SPEED) is fastest and produces the least compression, - while a value of ``9`` (Z_BEST_COMPRESSION) is slowest and produces the most. - ``0`` (Z_NO_COMPRESSION) is no compression. The default value is ``-1`` (Z_DEFAULT_COMPRESSION). - Z_DEFAULT_COMPRESSION represents a default compromise between speed and compression - (currently equivalent to level 6). + See :const:`Z_BEST_SPEED` (``1``), :const:`Z_BEST_COMPRESSION` (``9``), + :const:`Z_NO_COMPRESSION` (``0``), and the default, + :const:`Z_DEFAULT_COMPRESSION` (``-1``) for more information about these values. *method* is the compression algorithm. Currently, the only supported value is :const:`DEFLATED`. @@ -107,7 +102,7 @@ The available exception and functions in this module are: *strategy* is used to tune the compression algorithm. Possible values are :const:`Z_DEFAULT_STRATEGY`, :const:`Z_FILTERED`, :const:`Z_HUFFMAN_ONLY`, - :const:`Z_RLE` (zlib 1.2.0.1) and :const:`Z_FIXED` (zlib 1.2.2.2). + :const:`Z_RLE` and :const:`Z_FIXED`. *zdict* is a predefined compression dictionary. This is a sequence of bytes (such as a :class:`bytes` object) containing subsequences that are expected @@ -221,7 +216,7 @@ Compression objects support the following methods: All pending input is processed, and a bytes object containing the remaining compressed output is returned. *mode* can be selected from the constants :const:`Z_NO_FLUSH`, :const:`Z_PARTIAL_FLUSH`, :const:`Z_SYNC_FLUSH`, - :const:`Z_FULL_FLUSH`, :const:`Z_BLOCK` (zlib 1.2.3.4), or :const:`Z_FINISH`, + :const:`Z_FULL_FLUSH`, :const:`Z_BLOCK`, or :const:`Z_FINISH`, defaulting to :const:`Z_FINISH`. Except :const:`Z_FINISH`, all constants allow compressing further bytestrings of data, while :const:`Z_FINISH` finishes the compressed stream and prevents compressing any more data. After calling :meth:`flush` @@ -312,6 +307,137 @@ Decompression objects support the following methods and attributes: objects. +The following constants are available to configure compression and decompression +behavior: + +.. data:: DEFLATED + + The deflate compression method. + + +.. data:: MAX_WBITS + + The maximum window size, expressed as a power of 2. + For example, if :const:`!MAX_WBITS` is ``15`` it results in a window size + of ``32 KiB``. + + +.. data:: DEF_MEM_LEVEL + + The default memory level for compression objects. + + +.. data:: DEF_BUF_SIZE + + The default buffer size for decompression operations. + + +.. data:: Z_NO_COMPRESSION + + Compression level ``0``; no compression. + + .. versionadded:: 3.6 + + +.. data:: Z_BEST_SPEED + + Compression level ``1``; fastest and produces the least compression. + + +.. data:: Z_BEST_COMPRESSION + + Compression level ``9``; slowest and produces the most compression. + + +.. data:: Z_DEFAULT_COMPRESSION + + Default compression level (``-1``); a compromise between speed and + compression. Currently equivalent to compression level ``6``. + + +.. data:: Z_DEFAULT_STRATEGY + + Default compression strategy, for normal data. + + +.. data:: Z_FILTERED + + Compression strategy for data produced by a filter (or predictor). + + +.. data:: Z_HUFFMAN_ONLY + + Compression strategy that forces Huffman coding only. + + +.. data:: Z_RLE + + Compression strategy that limits match distances to one (run-length encoding). + + This constant is only available if Python was compiled with zlib + 1.2.0.1 or greater. + + .. versionadded:: 3.6 + + +.. data:: Z_FIXED + + Compression strategy that prevents the use of dynamic Huffman codes. + + This constant is only available if Python was compiled with zlib + 1.2.2.2 or greater. + + .. versionadded:: 3.6 + + +.. data:: Z_NO_FLUSH + + Flush mode ``0``. No special flushing behavior. + + .. versionadded:: 3.6 + + +.. data:: Z_PARTIAL_FLUSH + + Flush mode ``1``. Flush as much output as possible. + + +.. data:: Z_SYNC_FLUSH + + Flush mode ``2``. All output is flushed and the output is aligned to a byte boundary. + + +.. data:: Z_FULL_FLUSH + + Flush mode ``3``. All output is flushed and the compression state is reset. + + +.. data:: Z_FINISH + + Flush mode ``4``. All pending input is processed, no more input is expected. + + +.. data:: Z_BLOCK + + Flush mode ``5``. A deflate block is completed and emitted. + + This constant is only available if Python was compiled with zlib + 1.2.2.2 or greater. + + .. versionadded:: 3.6 + + +.. data:: Z_TREES + + Flush mode ``6``, for inflate operations. Instructs inflate to return when + it gets to the next deflate block boundary. + + This constant is only available if Python was compiled with zlib + 1.2.3.4 or greater. + + .. versionadded:: 3.6 + + Information about the version of the zlib library in use is available through the following constants: @@ -347,10 +473,10 @@ the following constants: Module :mod:`gzip` Reading and writing :program:`gzip`\ -format files. - http://www.zlib.net + https://www.zlib.net The zlib library home page. - http://www.zlib.net/manual.html + https://www.zlib.net/manual.html The zlib manual explains the semantics and usage of the library's many functions. diff --git a/Doc/library/zoneinfo.rst b/Doc/library/zoneinfo.rst index a57f3b8b3e858c..d95260896f75e1 100644 --- a/Doc/library/zoneinfo.rst +++ b/Doc/library/zoneinfo.rst @@ -195,7 +195,7 @@ The ``ZoneInfo`` class The ``ZoneInfo`` class has two alternate constructors: -.. classmethod:: ZoneInfo.from_file(fobj, /, key=None) +.. classmethod:: ZoneInfo.from_file(file_obj, /, key=None) Constructs a ``ZoneInfo`` object from a file-like object returning bytes (e.g. a file opened in binary mode or an :class:`io.BytesIO` object). @@ -299,7 +299,7 @@ The behavior of a ``ZoneInfo`` file depends on how it was constructed: 1. ``ZoneInfo(key)``: When constructed with the primary constructor, a ``ZoneInfo`` object is serialized by key, and when deserialized, the deserializing process uses the primary and thus it is expected that these - are expected to be the same object as other references to the same time + are the same object as other references to the same time zone. For example, if ``europe_berlin_pkl`` is a string containing a pickle constructed from ``ZoneInfo("Europe/Berlin")``, one would expect the following behavior: @@ -325,7 +325,7 @@ The behavior of a ``ZoneInfo`` file depends on how it was constructed: >>> a is b False -3. ``ZoneInfo.from_file(fobj, /, key=None)``: When constructed from a file, the +3. ``ZoneInfo.from_file(file_obj, /, key=None)``: When constructed from a file, the ``ZoneInfo`` object raises an exception on pickling. If an end user wants to pickle a ``ZoneInfo`` constructed from a file, it is recommended that they use a wrapper type or a custom serialization function: either serializing by diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index f36ed3e122f2bc..b55ced574e4d38 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -154,15 +154,15 @@ The :keyword:`for` statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: .. productionlist:: python-grammar - for_stmt: "for" `target_list` "in" `starred_list` ":" `suite` + for_stmt: "for" `target_list` "in" `starred_expression_list` ":" `suite` : ["else" ":" `suite`] -The ``starred_list`` expression is evaluated once; it should yield an -:term:`iterable` object. An :term:`iterator` is created for that iterable. -The first item provided -by the iterator is then assigned to the target list using the standard -rules for assignments (see :ref:`assignment`), and the suite is executed. This -repeats for each item provided by the iterator. When the iterator is exhausted, +The :token:`~python-grammar:starred_expression_list` expression is evaluated +once; it should yield an :term:`iterable` object. An :term:`iterator` is +created for that iterable. The first item provided by the iterator is then +assigned to the target list using the standard rules for assignments +(see :ref:`assignment`), and the suite is executed. This repeats for each +item provided by the iterator. When the iterator is exhausted, the suite in the :keyword:`!else` clause, if present, is executed, and the loop terminates. @@ -335,15 +335,29 @@ stored in the :mod:`sys` module is reset to its previous value:: :keyword:`!except*` clause -------------------------- -The :keyword:`!except*` clause(s) are used for handling -:exc:`ExceptionGroup`\s. The exception type for matching is interpreted as in -the case of :keyword:`except`, but in the case of exception groups we can have -partial matches when the type matches some of the exceptions in the group. -This means that multiple :keyword:`!except*` clauses can execute, -each handling part of the exception group. -Each clause executes at most once and handles an exception group -of all matching exceptions. Each exception in the group is handled by at most -one :keyword:`!except*` clause, the first that matches it. :: +The :keyword:`!except*` clause(s) specify one or more handlers for groups of +exceptions (:exc:`BaseExceptionGroup` instances). A :keyword:`try` statement +can have either :keyword:`except` or :keyword:`!except*` clauses, but not both. +The exception type for matching is mandatory in the case of :keyword:`!except*`, +so ``except*:`` is a syntax error. The type is interpreted as in the case of +:keyword:`!except`, but matching is performed on the exceptions contained in the +group that is being handled. An :exc:`TypeError` is raised if a matching +type is a subclass of :exc:`!BaseExceptionGroup`, because that would have +ambiguous semantics. + +When an exception group is raised in the try block, each :keyword:`!except*` +clause splits (see :meth:`~BaseExceptionGroup.split`) it into the subgroups +of matching and non-matching exceptions. If the matching subgroup is not empty, +it becomes the handled exception (the value returned from :func:`sys.exception`) +and assigned to the target of the :keyword:`!except*` clause (if there is one). +Then, the body of the :keyword:`!except*` clause executes. If the non-matching +subgroup is not empty, it is processed by the next :keyword:`!except*` in the +same manner. This continues until all exceptions in the group have been matched, +or the last :keyword:`!except*` clause has run. + +After all :keyword:`!except*` clauses execute, the group of unhandled exceptions +is merged with any exceptions that were raised or re-raised from within +:keyword:`!except*` clauses. This merged exception group propagates on.:: >>> try: ... raise ExceptionGroup("eg", @@ -356,22 +370,18 @@ one :keyword:`!except*` clause, the first that matches it. :: caught with nested (TypeError(2),) caught with nested (OSError(3), OSError(4)) + Exception Group Traceback (most recent call last): - | File "", line 2, in - | ExceptionGroup: eg + | File "", line 2, in + | raise ExceptionGroup("eg", + | [ValueError(1), TypeError(2), OSError(3), OSError(4)]) + | ExceptionGroup: eg (1 sub-exception) +-+---------------- 1 ---------------- | ValueError: 1 +------------------------------------ - -Any remaining exceptions that were not handled by any :keyword:`!except*` -clause are re-raised at the end, along with all exceptions that were -raised from within the :keyword:`!except*` clauses. If this list contains -more than one exception to reraise, they are combined into an exception -group. - -If the raised exception is not an exception group and its type matches -one of the :keyword:`!except*` clauses, it is caught and wrapped by an -exception group with an empty message string. :: +If the exception raised from the :keyword:`try` block is not an exception group +and its type matches one of the :keyword:`!except*` clauses, it is caught and +wrapped by an exception group with an empty message string. This ensures that the +type of the target ``e`` is consistently :exc:`BaseExceptionGroup`:: >>> try: ... raise BlockingIOError @@ -380,12 +390,6 @@ exception group with an empty message string. :: ... ExceptionGroup('', (BlockingIOError())) -An :keyword:`!except*` clause must have a matching expression; it cannot be ``except*:``. -Furthermore, this expression cannot contain exception group types, because that would -have ambiguous semantics. - -It is not possible to mix :keyword:`except` and :keyword:`!except*` -in the same :keyword:`try`. :keyword:`break`, :keyword:`continue` and :keyword:`return` cannot appear in an :keyword:`!except*` clause. @@ -416,12 +420,14 @@ clauses. -------------------------- If :keyword:`!finally` is present, it specifies a 'cleanup' handler. The -:keyword:`try` clause is executed, including any :keyword:`except` and -:keyword:`else` clauses. If an exception occurs in any of the clauses and is -not handled, the exception is temporarily saved. The :keyword:`!finally` clause -is executed. If there is a saved exception it is re-raised at the end of the -:keyword:`!finally` clause. If the :keyword:`!finally` clause raises another -exception, the saved exception is set as the context of the new exception. +:keyword:`try` clause is executed, including any :keyword:`except` +and :keyword:`else ` clauses. +If an exception occurs in any of the clauses and is not handled, +the exception is temporarily saved. +The :keyword:`!finally` clause is executed. If there is a saved exception +it is re-raised at the end of the :keyword:`!finally` clause. +If the :keyword:`!finally` clause raises another exception, the saved exception +is set as the context of the new exception. If the :keyword:`!finally` clause executes a :keyword:`return`, :keyword:`break` or :keyword:`continue` statement, the saved exception is discarded. For example, this function returns 42. @@ -588,6 +594,7 @@ the items are surrounded by parentheses. For example:: statement. .. _match: +.. _case: The :keyword:`!match` statement =============================== @@ -608,9 +615,9 @@ The match statement is used for pattern matching. Syntax: .. productionlist:: python-grammar match_stmt: 'match' `subject_expr` ":" NEWLINE INDENT `case_block`+ DEDENT - subject_expr: `star_named_expression` "," `star_named_expressions`? - : | `named_expression` - case_block: 'case' `patterns` [`guard`] ":" `block` + subject_expr: `!star_named_expression` "," `!star_named_expressions`? + : | `!named_expression` + case_block: 'case' `patterns` [`guard`] ":" `!block` .. note:: This section uses single quotes to denote @@ -699,7 +706,7 @@ Guards .. index:: ! guard .. productionlist:: python-grammar - guard: "if" `named_expression` + guard: "if" `!named_expression` A ``guard`` (which is part of the ``case``) must succeed for code inside the ``case`` block to execute. It takes the form: :keyword:`if` followed by an @@ -852,8 +859,8 @@ A literal pattern corresponds to most The rule ``strings`` and the token ``NUMBER`` are defined in the :doc:`standard Python grammar <./grammar>`. Triple-quoted strings are -supported. Raw strings and byte strings are supported. :ref:`f-strings` are -not supported. +supported. Raw strings and byte strings are supported. :ref:`f-strings` +and :ref:`t-strings` are not supported. The forms ``signed_number '+' NUMBER`` and ``signed_number '-' NUMBER`` are for expressing :ref:`complex numbers `; they require a real number @@ -1013,8 +1020,8 @@ subject value: items, as for a fixed-length sequence. .. note:: The length of the subject sequence is obtained via - :func:`len` (i.e. via the :meth:`__len__` protocol). This length may be - cached by the interpreter in a similar manner as + :func:`len` (i.e. via the :meth:`~object.__len__` protocol). + This length may be cached by the interpreter in a similar manner as :ref:`value patterns `. @@ -1065,8 +1072,8 @@ subject value: .. note:: Key-value pairs are matched using the two-argument form of the mapping subject's ``get()`` method. Matched key-value pairs must already be present - in the mapping, and not created on-the-fly via :meth:`__missing__` or - :meth:`~object.__getitem__`. + in the mapping, and not created on-the-fly via :meth:`~object.__missing__` + or :meth:`~object.__getitem__`. In simple terms ``{KEY1: P1, KEY2: P2, ... }`` matches only if all the following happens: @@ -1885,7 +1892,7 @@ expressions. The presence of annotations does not change the runtime semantics o the code, except if some mechanism is used that introspects and uses the annotations (such as :mod:`dataclasses` or :func:`functools.singledispatch`). -By default, annotations are lazily evaluated in a :ref:`annotation scope `. +By default, annotations are lazily evaluated in an :ref:`annotation scope `. This means that they are not evaluated when the code containing the annotation is evaluated. Instead, the interpreter saves information that can be used to evaluate the annotation later if requested. The :mod:`annotationlib` module provides tools for evaluating annotations. @@ -1898,6 +1905,12 @@ all annotations are instead stored as strings:: >>> f.__annotations__ {'param': 'annotation'} +This future statement will be deprecated and removed in a future version of Python, +but not before Python 3.13 reaches its end of life (see :pep:`749`). +When it is used, introspection tools like +:func:`annotationlib.get_annotations` and :func:`typing.get_type_hints` are +less likely to be able to resolve annotations at runtime. + .. rubric:: Footnotes diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 005a768f684e2c..71f669af76d8cd 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -16,9 +16,8 @@ Objects, values and types single: data :dfn:`Objects` are Python's abstraction for data. All data in a Python program -is represented by objects or by relations between objects. (In a sense, and in -conformance to Von Neumann's model of a "stored program computer", code is also -represented by objects.) +is represented by objects or by relations between objects. Even code is +represented by objects. .. index:: pair: built-in function; id @@ -29,9 +28,6 @@ represented by objects.) single: mutable object single: immutable object -.. XXX it *is* now possible in some cases to change an object's - type, under certain controlled conditions - Every object has an identity, a type and a value. An object's *identity* never changes once it has been created; you may think of it as the object's address in memory. The :keyword:`is` operator compares the identity of two objects; the @@ -262,6 +258,8 @@ Booleans (:class:`bool`) a string, the strings ``"False"`` or ``"True"`` are returned, respectively. +.. _datamodel-float: + :class:`numbers.Real` (:class:`float`) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1183,6 +1181,7 @@ Special attributes single: __module__ (class attribute) single: __dict__ (class attribute) single: __bases__ (class attribute) + single: __base__ (class attribute) single: __doc__ (class attribute) single: __annotations__ (class attribute) single: __annotate__ (class attribute) @@ -1217,6 +1216,13 @@ Special attributes In most cases, for a class defined as ``class X(A, B, C)``, ``X.__bases__`` will be exactly equal to ``(A, B, C)``. + * - .. attribute:: type.__base__ + - .. impl-detail:: + + The single base class in the inheritance chain that is responsible + for the memory layout of instances. This attribute corresponds to + :c:member:`~PyTypeObject.tp_base` at the C level. + * - .. attribute:: type.__doc__ - The class's documentation string, or ``None`` if undefined. Not inherited by subclasses. @@ -1228,10 +1234,22 @@ Special attributes :attr:`__annotations__ attributes `. For best practices on working with :attr:`~object.__annotations__`, - please see :mod:`annotationlib`. Where possible, use + please see :mod:`annotationlib`. Use :func:`annotationlib.get_annotations` instead of accessing this attribute directly. + .. warning:: + + Accessing the :attr:`!__annotations__` attribute directly + on a class object may return annotations for the wrong class, specifically + in certain cases where the class, its base class, or a metaclass + is defined under ``from __future__ import annotations``. + See :pep:`749 <749#pep749-metaclasses>` for details. + + This attribute does not exist on certain builtin classes. On + user-defined classes without ``__annotations__``, it is an + empty dictionary. + .. versionchanged:: 3.14 Annotations are now :ref:`lazily evaluated `. See :pep:`649`. @@ -1258,7 +1276,7 @@ Special attributes * - .. attribute:: type.__firstlineno__ - The line number of the first line of the class definition, including decorators. - Setting the :attr:`__module__` attribute removes the + Setting the :attr:`~type.__module__` attribute removes the :attr:`!__firstlineno__` item from the type's dictionary. .. versionadded:: 3.13 @@ -1624,6 +1642,7 @@ and are also passed to registered trace functions. single: f_locals (frame attribute) single: f_lasti (frame attribute) single: f_builtins (frame attribute) + single: f_generator (frame attribute) Special read-only attributes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1661,6 +1680,12 @@ Special read-only attributes (this is an index into the :term:`bytecode` string of the :ref:`code object `) + * - .. attribute:: frame.f_generator + - The :term:`generator` or :term:`coroutine` object that owns this frame, + or ``None`` if the frame is a normal function. + + .. versionadded:: 3.14 + .. index:: single: f_trace (frame attribute) single: f_trace_lines (frame attribute) @@ -1882,9 +1907,9 @@ falling back to :meth:`~object.__getitem__`). [#]_ When implementing a class that emulates any built-in type, it is important that the emulation only be implemented to the degree that it makes sense for the object being modelled. For example, some sequences may work well with retrieval -of individual elements, but extracting a slice may not make sense. (One example -of this is the :class:`~xml.dom.NodeList` interface in the W3C's Document -Object Model.) +of individual elements, but extracting a slice may not make sense. +(One example of this is the :ref:`NodeList ` interface +in the W3C's Document Object Model.) .. _customization: @@ -2337,6 +2362,9 @@ Customizing module attribute access single: __dir__ (module attribute) single: __class__ (module attribute) +.. method:: module.__getattr__ + module.__dir__ + Special names ``__getattr__`` and ``__dir__`` can be also used to customize access to module attributes. The ``__getattr__`` function at the module level should accept one argument which is the name of an attribute and return the @@ -2350,6 +2378,8 @@ The ``__dir__`` function should accept no arguments, and return an iterable of strings that represents the names accessible on module. If present, this function overrides the standard :func:`dir` search on a module. +.. attribute:: module.__class__ + For a more fine grained customization of the module behavior (setting attributes, properties, etc.), one can set the ``__class__`` attribute of a module object to a subclass of :class:`types.ModuleType`. For example:: @@ -2532,7 +2562,7 @@ instance dictionary. In contrast, non-data descriptors can be overridden by instances. Python methods (including those decorated with -:func:`@staticmethod ` and :func:`@classmethod `) are +:deco:`staticmethod` and :deco:`classmethod`) are implemented as non-data descriptors. Accordingly, instances can redefine and override methods. This allows individual instances to acquire behaviors that differ from other instances of the same class. @@ -2671,7 +2701,7 @@ class defining the method. .. versionadded:: 3.6 -When a class is created, :meth:`type.__new__` scans the class variables +When a class is created, :meth:`!type.__new__` scans the class variables and makes callbacks to those with a :meth:`~object.__set_name__` hook. .. method:: object.__set_name__(self, owner, name) @@ -2965,7 +2995,7 @@ class method ``__class_getitem__()``. When defined on a class, ``__class_getitem__()`` is automatically a class method. As such, there is no need for it to be decorated with - :func:`@classmethod` when it is defined. + :deco:`classmethod` when it is defined. The purpose of *__class_getitem__* @@ -3114,16 +3144,20 @@ objects. The :mod:`collections.abc` module provides a :term:`abstract base class` to help create those methods from a base set of :meth:`~object.__getitem__`, :meth:`~object.__setitem__`, :meth:`~object.__delitem__`, and :meth:`!keys`. -Mutable sequences should provide methods :meth:`!append`, :meth:`!count`, -:meth:`!index`, :meth:`!extend`, :meth:`!insert`, :meth:`!pop`, :meth:`!remove`, -:meth:`!reverse` and :meth:`!sort`, like Python standard :class:`list` -objects. Finally, -sequence types should implement addition (meaning concatenation) and + +Mutable sequences should provide methods +:meth:`~sequence.append`, :meth:`~sequence.clear`, :meth:`~sequence.count`, +:meth:`~sequence.extend`, :meth:`~sequence.index`, :meth:`~sequence.insert`, +:meth:`~sequence.pop`, :meth:`~sequence.remove`, and :meth:`~sequence.reverse`, +like Python standard :class:`list` objects. +Finally, sequence types should implement addition (meaning concatenation) and multiplication (meaning repetition) by defining the methods :meth:`~object.__add__`, :meth:`~object.__radd__`, :meth:`~object.__iadd__`, :meth:`~object.__mul__`, :meth:`~object.__rmul__` and :meth:`~object.__imul__` described below; they should not define other numerical -operators. It is recommended that both mappings and sequences implement the +operators. + +It is recommended that both mappings and sequences implement the :meth:`~object.__contains__` method to allow efficient use of the ``in`` operator; for mappings, ``in`` should search the mapping's keys; for sequences, it should diff --git a/Doc/reference/executionmodel.rst b/Doc/reference/executionmodel.rst index cb6c524dd97a30..639c232571edf3 100644 --- a/Doc/reference/executionmodel.rst +++ b/Doc/reference/executionmodel.rst @@ -398,6 +398,192 @@ See also the description of the :keyword:`try` statement in section :ref:`try` and :keyword:`raise` statement in section :ref:`raise`. +.. _execcomponents: + +Runtime Components +================== + +General Computing Model +----------------------- + +Python's execution model does not operate in a vacuum. It runs on +a host machine and through that host's runtime environment, including +its operating system (OS), if there is one. When a program runs, +the conceptual layers of how it runs on the host look something +like this: + + | **host machine** + | **process** (global resources) + | **thread** (runs machine code) + +Each process represents a program running on the host. Think of each +process itself as the data part of its program. Think of the process' +threads as the execution part of the program. This distinction will +be important to understand the conceptual Python runtime. + +The process, as the data part, is the execution context in which the +program runs. It mostly consists of the set of resources assigned to +the program by the host, including memory, signals, file handles, +sockets, and environment variables. + +Processes are isolated and independent from one another. (The same +is true for hosts.) The host manages the process' access to its +assigned resources, in addition to coordinating between processes. + +Each thread represents the actual execution of the program's machine +code, running relative to the resources assigned to the program's +process. It's strictly up to the host how and when that execution +takes place. + +From the point of view of Python, a program always starts with exactly +one thread. However, the program may grow to run in multiple +simultaneous threads. Not all hosts support multiple threads per +process, but most do. Unlike processes, threads in a process are not +isolated and independent from one another. Specifically, all threads +in a process share all of the process' resources. + +The fundamental point of threads is that each one does *run* +independently, at the same time as the others. That may be only +conceptually at the same time ("concurrently") or physically +("in parallel"). Either way, the threads effectively run +at a non-synchronized rate. + +.. note:: + + That non-synchronized rate means none of the process' memory is + guaranteed to stay consistent for the code running in any given + thread. Thus multi-threaded programs must take care to coordinate + access to intentionally shared resources. Likewise, they must take + care to be absolutely diligent about not accessing any *other* + resources in multiple threads; otherwise two threads running at the + same time might accidentally interfere with each other's use of some + shared data. All this is true for both Python programs and the + Python runtime. + + The cost of this broad, unstructured requirement is the tradeoff for + the kind of raw concurrency that threads provide. The alternative + to the required discipline generally means dealing with + non-deterministic bugs and data corruption. + +Python Runtime Model +-------------------- + +The same conceptual layers apply to each Python program, with some +extra data layers specific to Python: + + | **host machine** + | **process** (global resources) + | Python global runtime (*state*) + | Python interpreter (*state*) + | **thread** (runs Python bytecode and "C-API") + | Python thread *state* + +At the conceptual level: when a Python program starts, it looks exactly +like that diagram, with one of each. The runtime may grow to include +multiple interpreters, and each interpreter may grow to include +multiple thread states. + +.. note:: + + A Python implementation won't necessarily implement the runtime + layers distinctly or even concretely. The only exception is places + where distinct layers are directly specified or exposed to users, + like through the :mod:`threading` module. + +.. note:: + + The initial interpreter is typically called the "main" interpreter. + Some Python implementations, like CPython, assign special roles + to the main interpreter. + + Likewise, the host thread where the runtime was initialized is known + as the "main" thread. It may be different from the process' initial + thread, though they are often the same. In some cases "main thread" + may be even more specific and refer to the initial thread state. + A Python runtime might assign specific responsibilities + to the main thread, such as handling signals. + +As a whole, the Python runtime consists of the global runtime state, +interpreters, and thread states. The runtime ensures all that state +stays consistent over its lifetime, particularly when used with +multiple host threads. + +The global runtime, at the conceptual level, is just a set of +interpreters. While those interpreters are otherwise isolated and +independent from one another, they may share some data or other +resources. The runtime is responsible for managing these global +resources safely. The actual nature and management of these resources +is implementation-specific. Ultimately, the external utility of the +global runtime is limited to managing interpreters. + +In contrast, an "interpreter" is conceptually what we would normally +think of as the (full-featured) "Python runtime". When machine code +executing in a host thread interacts with the Python runtime, it calls +into Python in the context of a specific interpreter. + +.. note:: + + The term "interpreter" here is not the same as the "bytecode + interpreter", which is what regularly runs in threads, executing + compiled Python code. + + In an ideal world, "Python runtime" would refer to what we currently + call "interpreter". However, it's been called "interpreter" at least + since introduced in 1997 (`CPython:a027efa5b`_). + + .. _CPython:a027efa5b: https://github.com/python/cpython/commit/a027efa5b + +Each interpreter completely encapsulates all of the non-process-global, +non-thread-specific state needed for the Python runtime to work. +Notably, the interpreter's state persists between uses. It includes +fundamental data like :data:`sys.modules`. The runtime ensures +multiple threads using the same interpreter will safely +share it between them. + +A Python implementation may support using multiple interpreters at the +same time in the same process. They are independent and isolated from +one another. For example, each interpreter has its own +:data:`sys.modules`. + +For thread-specific runtime state, each interpreter has a set of thread +states, which it manages, in the same way the global runtime contains +a set of interpreters. It can have thread states for as many host +threads as it needs. It may even have multiple thread states for +the same host thread, though that isn't as common. + +Each thread state, conceptually, has all the thread-specific runtime +data an interpreter needs to operate in one host thread. The thread +state includes the current raised exception and the thread's Python +call stack. It may include other thread-specific resources. + +.. note:: + + The term "Python thread" can sometimes refer to a thread state, but + normally it means a thread created using the :mod:`threading` module. + +Each thread state, over its lifetime, is always tied to exactly one +interpreter and exactly one host thread. It will only ever be used in +that thread and with that interpreter. + +Multiple thread states may be tied to the same host thread, whether for +different interpreters or even the same interpreter. However, for any +given host thread, only one of the thread states tied to it can be used +by the thread at a time. + +Thread states are isolated and independent from one another and don't +share any data, except for possibly sharing an interpreter and objects +or other resources belonging to that interpreter. + +Once a program is running, new Python threads can be created using the +:mod:`threading` module (on platforms and Python implementations that +support threads). Additional processes can be created using the +:mod:`os`, :mod:`subprocess`, and :mod:`multiprocessing` modules. +Interpreters can be created and used with the +:mod:`~concurrent.interpreters` module. Coroutines (async) can +be run using :mod:`asyncio` in each interpreter, typically only +in a single thread (often the main thread). + + .. rubric:: Footnotes .. [#] This limitation occurs because the code that is executed by these operations diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 8837344e5ddca1..165dfa69f880d0 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -133,14 +133,18 @@ Literals Python supports string and bytes literals and various numeric literals: -.. productionlist:: python-grammar - literal: `stringliteral` | `bytesliteral` - : | `integer` | `floatnumber` | `imagnumber` +.. grammar-snippet:: + :group: python-grammar + + literal: `strings` | `NUMBER` Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value may be approximated in the case of floating-point and imaginary (complex) -literals. See section :ref:`literals` for details. +literals. +See section :ref:`literals` for details. +See section :ref:`string-concatenation` for details on ``strings``. + .. index:: triple: immutable; data; type @@ -153,6 +157,58 @@ occurrence) may obtain the same object or a different object with the same value. +.. _string-concatenation: + +String literal concatenation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Multiple adjacent string or bytes literals (delimited by whitespace), possibly +using different quoting conventions, are allowed, and their meaning is the same +as their concatenation:: + + >>> "hello" 'world' + "helloworld" + +Formally: + +.. grammar-snippet:: + :group: python-grammar + + strings: ( `STRING` | `fstring`)+ | `tstring`+ + +This feature is defined at the syntactical level, so it only works with literals. +To concatenate string expressions at run time, the '+' operator may be used:: + + >>> greeting = "Hello" + >>> space = " " + >>> name = "Blaise" + >>> print(greeting + space + name) # not: print(greeting space name) + Hello Blaise + +Literal concatenation can freely mix raw strings, triple-quoted strings, +and formatted string literals. +For example:: + + >>> "Hello" r', ' f"{name}!" + "Hello, Blaise!" + +This feature can be used to reduce the number of backslashes +needed, to split long strings conveniently across long lines, or even to add +comments to parts of strings. For example:: + + re.compile("[A-Za-z_]" # letter or underscore + "[A-Za-z0-9_]*" # letter, digit or underscore + ) + +However, bytes literals may only be combined with other byte literals; +not with string literals of any kind. +Also, template string literals may only be combined with other template +string literals:: + + >>> t"Hello" t"{name}!" + Template(strings=('Hello', '!'), interpolations=(...)) + + .. _parenthesized: Parenthesized forms @@ -406,8 +462,9 @@ brackets or curly braces. Variables used in the generator expression are evaluated lazily when the :meth:`~generator.__next__` method is called for the generator object (in the same fashion as normal generators). However, the iterable expression in the -leftmost :keyword:`!for` clause is immediately evaluated, so that an error -produced by it will be emitted at the point where the generator expression +leftmost :keyword:`!for` clause is immediately evaluated, and the +:term:`iterator` is immediately created for that iterable, so that an error +produced while creating the iterator will be emitted at the point where the generator expression is defined, rather than at the point where the first value is retrieved. Subsequent :keyword:`!for` clauses and any filter condition in the leftmost :keyword:`!for` clause cannot be evaluated in the enclosing scope as they may @@ -625,8 +682,10 @@ is already executing raises a :exc:`ValueError` exception. .. method:: generator.close() - Raises a :exc:`GeneratorExit` at the point where the generator function was - paused. If the generator function catches the exception and returns a + Raises a :exc:`GeneratorExit` exception at the point where the generator + function was paused (equivalent to calling ``throw(GeneratorExit)``). + The exception is raised by the yield expression where the generator was paused. + If the generator function catches the exception and returns a value, this value is returned from :meth:`close`. If the generator function is already closed, or raises :exc:`GeneratorExit` (by not catching the exception), :meth:`close` returns :const:`None`. If the generator yields a @@ -1023,7 +1082,7 @@ series of :term:`arguments `: : ["," `keywords_arguments`] : | `starred_and_keywords` ["," `keywords_arguments`] : | `keywords_arguments` - positional_arguments: positional_item ("," positional_item)* + positional_arguments: `positional_item` ("," `positional_item`)* positional_item: `assignment_expression` | "*" `expression` starred_and_keywords: ("*" `expression` | `keyword_item`) : ("," "*" `expression` | "," `keyword_item`)* @@ -1879,8 +1938,9 @@ Conditional expressions conditional_expression: `or_test` ["if" `or_test` "else" `expression`] expression: `conditional_expression` | `lambda_expr` -Conditional expressions (sometimes called a "ternary operator") have the lowest -priority of all Python operations. +A conditional expression (sometimes called a "ternary operator") is an +alternative to the if-else statement. As it is an expression, it returns a value +and can appear as a sub-expression. The expression ``x if C else y`` first evaluates the condition, *C* rather than *x*. If *C* is true, *x* is evaluated and its value is returned; otherwise, *y* is @@ -1928,7 +1988,7 @@ Expression lists single: , (comma); expression list .. productionlist:: python-grammar - starred_expression: ["*"] `or_expr` + starred_expression: "*" `or_expr` | `expression` flexible_expression: `assignment_expression` | `starred_expression` flexible_expression_list: `flexible_expression` ("," `flexible_expression`)* [","] starred_expression_list: `starred_expression` ("," `starred_expression`)* [","] diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index b9cca4444c9141..1037feb691f6bc 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -8,15 +8,12 @@ used to generate the CPython parser (see :source:`Grammar/python.gram`). The version here omits details related to code generation and error recovery. -The notation is a mixture of `EBNF -`_ -and `PEG `_. -In particular, ``&`` followed by a symbol, token or parenthesized -group indicates a positive lookahead (i.e., is required to match but -not consumed), while ``!`` indicates a negative lookahead (i.e., is -required *not* to match). We use the ``|`` separator to mean PEG's -"ordered choice" (written as ``/`` in traditional PEG grammars). See -:pep:`617` for more details on the grammar's syntax. +The notation used here is the same as in the preceding docs, +and is described in the :ref:`notation ` section, +except for an extra complication: + +* ``~`` ("cut"): commit to the current alternative and fail the rule + even if this fails to parse .. literalinclude:: ../../Grammar/python.gram :language: peg diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst index b7b70e6be5a5b7..c62240b18cfe55 100644 --- a/Doc/reference/introduction.rst +++ b/Doc/reference/introduction.rst @@ -90,44 +90,130 @@ Notation .. index:: BNF, grammar, syntax, notation -The descriptions of lexical analysis and syntax use a modified -`Backus–Naur form (BNF) `_ grammar -notation. This uses the following style of definition: - -.. productionlist:: notation - name: `lc_letter` (`lc_letter` | "_")* - lc_letter: "a"..."z" - -The first line says that a ``name`` is an ``lc_letter`` followed by a sequence -of zero or more ``lc_letter``\ s and underscores. An ``lc_letter`` in turn is -any of the single characters ``'a'`` through ``'z'``. (This rule is actually -adhered to for the names defined in lexical and grammar rules in this document.) - -Each rule begins with a name (which is the name defined by the rule) and -``::=``. A vertical bar (``|``) is used to separate alternatives; it is the -least binding operator in this notation. A star (``*``) means zero or more -repetitions of the preceding item; likewise, a plus (``+``) means one or more -repetitions, and a phrase enclosed in square brackets (``[ ]``) means zero or -one occurrences (in other words, the enclosed phrase is optional). The ``*`` -and ``+`` operators bind as tightly as possible; parentheses are used for -grouping. Literal strings are enclosed in quotes. White space is only -meaningful to separate tokens. Rules are normally contained on a single line; -rules with many alternatives may be formatted alternatively with each line after -the first beginning with a vertical bar. - -.. index:: lexical definitions, ASCII - -In lexical definitions (as the example above), two more conventions are used: -Two literal characters separated by three dots mean a choice of any single -character in the given (inclusive) range of ASCII characters. A phrase between -angular brackets (``<...>``) gives an informal description of the symbol -defined; e.g., this could be used to describe the notion of 'control character' -if needed. - -Even though the notation used is almost the same, there is a big difference -between the meaning of lexical and syntactic definitions: a lexical definition -operates on the individual characters of the input source, while a syntax -definition operates on the stream of tokens generated by the lexical analysis. -All uses of BNF in the next chapter ("Lexical Analysis") are lexical -definitions; uses in subsequent chapters are syntactic definitions. +The descriptions of lexical analysis and syntax use a grammar notation that +is a mixture of +`EBNF `_ +and `PEG `_. +For example: +.. grammar-snippet:: + :group: notation + + name: `letter` (`letter` | `digit` | "_")* + letter: "a"..."z" | "A"..."Z" + digit: "0"..."9" + +In this example, the first line says that a ``name`` is a ``letter`` followed +by a sequence of zero or more ``letter``\ s, ``digit``\ s, and underscores. +A ``letter`` in turn is any of the single characters ``'a'`` through +``'z'`` and ``A`` through ``Z``; a ``digit`` is a single character from ``0`` +to ``9``. + +Each rule begins with a name (which identifies the rule that's being defined) +followed by a colon, ``:``. +The definition to the right of the colon uses the following syntax elements: + +* ``name``: A name refers to another rule. + Where possible, it is a link to the rule's definition. + + * ``TOKEN``: An uppercase name refers to a :term:`token`. + For the purposes of grammar definitions, tokens are the same as rules. + +* ``"text"``, ``'text'``: Text in single or double quotes must match literally + (without the quotes). The type of quote is chosen according to the meaning + of ``text``: + + * ``'if'``: A name in single quotes denotes a :ref:`keyword `. + * ``"case"``: A name in double quotes denotes a + :ref:`soft-keyword `. + * ``'@'``: A non-letter symbol in single quotes denotes an + :py:data:`~token.OP` token, that is, a :ref:`delimiter ` or + :ref:`operator `. + +* ``e1 e2``: Items separated only by whitespace denote a sequence. + Here, ``e1`` must be followed by ``e2``. +* ``e1 | e2``: A vertical bar is used to separate alternatives. + It denotes PEG's "ordered choice": if ``e1`` matches, ``e2`` is + not considered. + In traditional PEG grammars, this is written as a slash, ``/``, rather than + a vertical bar. + See :pep:`617` for more background and details. +* ``e*``: A star means zero or more repetitions of the preceding item. +* ``e+``: Likewise, a plus means one or more repetitions. +* ``[e]``: A phrase enclosed in square brackets means zero or + one occurrences. In other words, the enclosed phrase is optional. +* ``e?``: A question mark has exactly the same meaning as square brackets: + the preceding item is optional. +* ``(e)``: Parentheses are used for grouping. + +The following notation is only used in +:ref:`lexical definitions `. + +* ``"a"..."z"``: Two literal characters separated by three dots mean a choice + of any single character in the given (inclusive) range of ASCII characters. +* ``<...>``: A phrase between angular brackets gives an informal description + of the matched symbol (for example, ````), + or an abbreviation that is defined in nearby text (for example, ````). + +.. _lexical-lookaheads: + +Some definitions also use *lookaheads*, which indicate that an element +must (or must not) match at a given position, but without consuming any input: + +* ``&e``: a positive lookahead (that is, ``e`` is required to match) +* ``!e``: a negative lookahead (that is, ``e`` is required *not* to match) + +The unary operators (``*``, ``+``, ``?``) bind as tightly as possible; +the vertical bar (``|``) binds most loosely. + +White space is only meaningful to separate tokens. + +Rules are normally contained on a single line, but rules that are too long +may be wrapped: + +.. grammar-snippet:: + :group: notation + + literal: stringliteral | bytesliteral + | integer | floatnumber | imagnumber + +Alternatively, rules may be formatted with the first line ending at the colon, +and each alternative beginning with a vertical bar on a new line. +For example: + + +.. grammar-snippet:: + :group: notation-alt + + literal: + | stringliteral + | bytesliteral + | integer + | floatnumber + | imagnumber + +This does *not* mean that there is an empty first alternative. + +.. index:: lexical definitions + +.. _notation-lexical-vs-syntactic: + +Lexical and Syntactic definitions +--------------------------------- + +There is some difference between *lexical* and *syntactic* analysis: +the :term:`lexical analyzer` operates on the individual characters of the +input source, while the *parser* (syntactic analyzer) operates on the stream +of :term:`tokens ` generated by the lexical analysis. +However, in some cases the exact boundary between the two phases is a +CPython implementation detail. + +The practical difference between the two is that in *lexical* definitions, +all whitespace is significant. +The lexical analyzer :ref:`discards ` all whitespace that is not +converted to tokens like :data:`token.INDENT` or :data:`~token.NEWLINE`. +*Syntactic* definitions then use these tokens, rather than source characters. + +This documentation uses the same BNF grammar for both styles of definitions. +All uses of BNF in the next chapter (:ref:`lexical`) are lexical definitions; +uses in subsequent chapters are syntactic definitions. diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ff801a7d4fc494..c2bf45db265b33 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -10,12 +10,76 @@ Lexical analysis A Python program is read by a *parser*. Input to the parser is a stream of :term:`tokens `, generated by the *lexical analyzer* (also known as the *tokenizer*). -This chapter describes how the lexical analyzer breaks a file into tokens. +This chapter describes how the lexical analyzer produces these tokens. -Python reads program text as Unicode code points; the encoding of a source file -can be given by an encoding declaration and defaults to UTF-8, see :pep:`3120` -for details. If the source file cannot be decoded, a :exc:`SyntaxError` is -raised. +The lexical analyzer determines the program text's :ref:`encoding ` +(UTF-8 by default), and decodes the text into +:ref:`source characters `. +If the text cannot be decoded, a :exc:`SyntaxError` is raised. + +Next, the lexical analyzer uses the source characters to generate a stream of tokens. +The type of a generated token generally depends on the next source character to +be processed. Similarly, other special behavior of the analyzer depends on +the first source character that hasn't yet been processed. +The following table gives a quick summary of these source characters, +with links to sections that contain more information. + +.. list-table:: + :header-rows: 1 + + * - Character + - Next token (or other relevant documentation) + + * - * space + * tab + * formfeed + - * :ref:`Whitespace ` + + * - * CR, LF + - * :ref:`New line ` + * :ref:`Indentation ` + + * - * backslash (``\``) + - * :ref:`Explicit line joining ` + * (Also significant in :ref:`string escape sequences `) + + * - * hash (``#``) + - * :ref:`Comment ` + + * - * quote (``'``, ``"``) + - * :ref:`String literal ` + + * - * ASCII letter (``a``-``z``, ``A``-``Z``) + * non-ASCII character + - * :ref:`Name ` + * Prefixed :ref:`string or bytes literal ` + + * - * underscore (``_``) + - * :ref:`Name ` + * (Can also be part of :ref:`numeric literals `) + + * - * number (``0``-``9``) + - * :ref:`Numeric literal ` + + * - * dot (``.``) + - * :ref:`Numeric literal ` + * :ref:`Operator ` + + * - * question mark (``?``) + * dollar (``$``) + * + .. (the following uses zero-width space characters to render + .. a literal backquote) + + backquote (``​`​``) + * control character + - * Error (outside string literals and comments) + + * - * other printing character + - * :ref:`Operator or delimiter ` + + * - * end of file + - * :ref:`End marker ` .. _line-structure: @@ -35,11 +99,12 @@ Logical lines .. index:: logical line, physical line, line joining, NEWLINE token -The end of a logical line is represented by the token NEWLINE. Statements -cannot cross logical line boundaries except where NEWLINE is allowed by the -syntax (e.g., between statements in compound statements). A logical line is -constructed from one or more *physical lines* by following the explicit or -implicit *line joining* rules. +The end of a logical line is represented by the token :data:`~token.NEWLINE`. +Statements cannot cross logical line boundaries except where :data:`!NEWLINE` +is allowed by the syntax (e.g., between statements in compound statements). +A logical line is constructed from one or more *physical lines* by following +the :ref:`explicit ` or :ref:`implicit ` +*line joining* rules. .. _physical-lines: @@ -47,17 +112,30 @@ implicit *line joining* rules. Physical lines -------------- -A physical line is a sequence of characters terminated by an end-of-line -sequence. In source files and strings, any of the standard platform line -termination sequences can be used - the Unix form using ASCII LF (linefeed), -the Windows form using the ASCII sequence CR LF (return followed by linefeed), -or the old Macintosh form using the ASCII CR (return) character. All of these -forms can be used equally, regardless of platform. The end of input also serves -as an implicit terminator for the final physical line. +A physical line is a sequence of characters terminated by one the following +end-of-line sequences: + +* the Unix form using ASCII LF (linefeed), +* the Windows form using the ASCII sequence CR LF (return followed by linefeed), +* the '`Classic Mac OS`__' form using the ASCII CR (return) character. -When embedding Python, source code strings should be passed to Python APIs using -the standard C conventions for newline characters (the ``\n`` character, -representing ASCII LF, is the line terminator). + __ https://en.wikipedia.org/wiki/Classic_Mac_OS + +Regardless of platform, each of these sequences is replaced by a single +ASCII LF (linefeed) character. +(This is done even inside :ref:`string literals `.) +Each line can use any of the sequences; they do not need to be consistent +within a file. + +The end of input also serves as an implicit terminator for the final +physical line. + +Formally: + +.. grammar-snippet:: + :group: python-grammar + + newline: | | .. _comments: @@ -99,13 +177,25 @@ which is recognized by Bram Moolenaar's VIM. If no encoding declaration is found, the default encoding is UTF-8. If the implicit or explicit encoding of a file is UTF-8, an initial UTF-8 byte-order -mark (b'\xef\xbb\xbf') is ignored rather than being a syntax error. +mark (``b'\xef\xbb\xbf'``) is ignored rather than being a syntax error. If an encoding is declared, the encoding name must be recognized by Python (see :ref:`standard-encodings`). The encoding is used for all lexical analysis, including string literals, comments and identifiers. +.. _lexical-source-character: + +All lexical analysis, including string literals, comments +and identifiers, works on Unicode text decoded using the source encoding. +Any Unicode code point, except the NUL control character, can appear in +Python source. + +.. grammar-snippet:: + :group: python-grammar + + source_character: + .. _explicit-joining: @@ -160,11 +250,12 @@ Blank lines .. index:: single: blank line A logical line that contains only spaces, tabs, formfeeds and possibly a -comment, is ignored (i.e., no NEWLINE token is generated). During interactive -input of statements, handling of a blank line may differ depending on the -implementation of the read-eval-print loop. In the standard interactive -interpreter, an entirely blank logical line (i.e. one containing not even -whitespace or a comment) terminates a multi-line statement. +comment, is ignored (i.e., no :data:`~token.NEWLINE` token is generated). +During interactive input of statements, handling of a blank line may differ +depending on the implementation of the read-eval-print loop. +In the standard interactive interpreter, an entirely blank logical line (that +is, one containing not even whitespace or a comment) terminates a multi-line +statement. .. _indentation: @@ -202,19 +293,20 @@ the space count to zero). .. index:: INDENT token, DEDENT token -The indentation levels of consecutive lines are used to generate INDENT and -DEDENT tokens, using a stack, as follows. +The indentation levels of consecutive lines are used to generate +:data:`~token.INDENT` and :data:`~token.DEDENT` tokens, using a stack, +as follows. Before the first line of the file is read, a single zero is pushed on the stack; this will never be popped off again. The numbers pushed on the stack will always be strictly increasing from bottom to top. At the beginning of each logical line, the line's indentation level is compared to the top of the stack. If it is equal, nothing happens. If it is larger, it is pushed on the stack, and -one INDENT token is generated. If it is smaller, it *must* be one of the +one :data:`!INDENT` token is generated. If it is smaller, it *must* be one of the numbers occurring on the stack; all numbers on the stack that are larger are -popped off, and for each number popped off a DEDENT token is generated. At the -end of the file, a DEDENT token is generated for each number remaining on the -stack that is larger than zero. +popped off, and for each number popped off a :data:`!DEDENT` token is generated. +At the end of the file, a :data:`!DEDENT` token is generated for each number +remaining on the stack that is larger than zero. Here is an example of a correctly (though confusingly) indented piece of Python code:: @@ -253,9 +345,27 @@ Whitespace between tokens Except at the beginning of a logical line or in string literals, the whitespace characters space, tab and formfeed can be used interchangeably to separate -tokens. Whitespace is needed between two tokens only if their concatenation -could otherwise be interpreted as a different token (e.g., ab is one token, but -a b is two tokens). +tokens: + +.. grammar-snippet:: + :group: python-grammar + + whitespace: ' ' | tab | formfeed + + +Whitespace is needed between two tokens only if their concatenation +could otherwise be interpreted as a different token. For example, ``ab`` is one +token, but ``a b`` is two tokens. However, ``+a`` and ``+ a`` both produce +two tokens, ``+`` and ``a``, as ``+a`` is not a valid token. + + +.. _endmarker-token: + +End marker +---------- + +At the end of non-interactive input, the lexical analyzer generates an +:data:`~token.ENDMARKER` token. .. _other-tokens: @@ -263,67 +373,50 @@ a b is two tokens). Other tokens ============ -Besides NEWLINE, INDENT and DEDENT, the following categories of tokens exist: -*identifiers*, *keywords*, *literals*, *operators*, and *delimiters*. Whitespace -characters (other than line terminators, discussed earlier) are not tokens, but -serve to delimit tokens. Where ambiguity exists, a token comprises the longest -possible string that forms a legal token, when read from left to right. +Besides :data:`~token.NEWLINE`, :data:`~token.INDENT` and :data:`~token.DEDENT`, +the following categories of tokens exist: +*identifiers* and *keywords* (:data:`~token.NAME`), *literals* (such as +:data:`~token.NUMBER` and :data:`~token.STRING`), and other symbols +(*operators* and *delimiters*, :data:`~token.OP`). +Whitespace characters (other than logical line terminators, discussed earlier) +are not tokens, but serve to delimit tokens. +Where ambiguity exists, a token comprises the longest possible string that +forms a legal token, when read from left to right. .. _identifiers: -Identifiers and keywords -======================== +Names (identifiers and keywords) +================================ .. index:: identifier, name -Identifiers (also referred to as *names*) are described by the following lexical -definitions. - -The syntax of identifiers in Python is based on the Unicode standard annex -UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for -further details. - -Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -include the uppercase and lowercase letters ``A`` through -``Z``, the underscore ``_`` and, except for the first character, the digits -``0`` through ``9``. -Python 3.0 introduced additional characters from outside the ASCII range (see -:pep:`3131`). For these characters, the classification uses the version of the -Unicode Character Database as included in the :mod:`unicodedata` module. - -Identifiers are unlimited in length. Case is significant. - -.. productionlist:: python-grammar - identifier: `xid_start` `xid_continue`* - id_start: - id_continue: - xid_start: - xid_continue: - -The Unicode category codes mentioned above stand for: - -* *Lu* - uppercase letters -* *Ll* - lowercase letters -* *Lt* - titlecase letters -* *Lm* - modifier letters -* *Lo* - other letters -* *Nl* - letter numbers -* *Mn* - nonspacing marks -* *Mc* - spacing combining marks -* *Nd* - decimal numbers -* *Pc* - connector punctuations -* *Other_ID_Start* - explicit list of characters in `PropList.txt - `_ to support backwards - compatibility -* *Other_ID_Continue* - likewise - -All identifiers are converted into the normal form NFKC while parsing; comparison -of identifiers is based on NFKC. - -A non-normative HTML file listing all valid identifier characters for Unicode -16.0.0 can be found at -https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +:data:`~token.NAME` tokens represent *identifiers*, *keywords*, and +*soft keywords*. + +Names are composed of the following characters: + +* uppercase and lowercase letters (``A-Z`` and ``a-z``), +* the underscore (``_``), +* digits (``0`` through ``9``), which cannot appear as the first character, and +* non-ASCII characters. Valid names may only contain "letter-like" and + "digit-like" characters; see :ref:`lexical-names-nonascii` for details. + +Names must contain at least one character, but have no upper length limit. +Case is significant. + +Formally, names are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + NAME: `name_start` `name_continue`* + name_start: "a"..."z" | "A"..."Z" | "_" | + name_continue: name_start | "0"..."9" + identifier: <`NAME`, except keywords> + +Note that not all names matched by this grammar are valid; see +:ref:`lexical-names-nonascii` for details. .. _keywords: @@ -335,7 +428,7 @@ Keywords single: keyword single: reserved word -The following identifiers are used as reserved words, or *keywords* of the +The following names are used as reserved words, or *keywords* of the language, and cannot be used as ordinary identifiers. They must be spelled exactly as written here: @@ -359,18 +452,19 @@ Soft Keywords .. versionadded:: 3.10 -Some identifiers are only reserved under specific contexts. These are known as -*soft keywords*. The identifiers ``match``, ``case``, ``type`` and ``_`` can -syntactically act as keywords in certain contexts, +Some names are only reserved under specific contexts. These are known as +*soft keywords*: + +- ``match``, ``case``, and ``_``, when used in the :keyword:`match` statement. +- ``type``, when used in the :keyword:`type` statement. + +These syntactically act as keywords in their specific contexts, but this distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. -``match``, ``case``, and ``_`` are used in the :keyword:`match` statement. -``type`` is used in the :keyword:`type` statement. - .. versionchanged:: 3.12 ``type`` is now a soft keyword. @@ -425,6 +519,95 @@ characters: :ref:`atom-identifiers`. +.. _lexical-names-nonascii: + +Non-ASCII characters in names +----------------------------- + +Names that contain non-ASCII characters need additional normalization +and validation beyond the rules and grammar explained +:ref:`above `. +For example, ``ř_1``, ``蛇``, or ``साँप`` are valid names, but ``r〰2``, +``€``, or ``🐍`` are not. + +This section explains the exact rules. + +All names are converted into the `normalization form`_ NFKC while parsing. +This means that, for example, some typographic variants of characters are +converted to their "basic" form. For example, ``fiⁿₐˡᵢᶻₐᵗᵢᵒₙ`` normalizes to +``finalization``, so Python treats them as the same name:: + + >>> fiⁿₐˡᵢᶻₐᵗᵢᵒₙ = 3 + >>> finalization + 3 + +.. note:: + + Normalization is done at the lexical level only. + Run-time functions that take names as *strings* generally do not normalize + their arguments. + For example, the variable defined above is accessible at run time in the + :func:`globals` dictionary as ``globals()["finalization"]`` but not + ``globals()["fiⁿₐˡᵢᶻₐᵗᵢᵒₙ"]``. + +Similarly to how ASCII-only names must contain only letters, digits and +the underscore, and cannot start with a digit, a valid name must +start with a character in the "letter-like" set ``xid_start``, +and the remaining characters must be in the "letter- and digit-like" set +``xid_continue``. + +These sets based on the *XID_Start* and *XID_Continue* sets as defined by the +Unicode standard annex `UAX-31`_. +Python's ``xid_start`` additionally includes the underscore (``_``). +Note that Python does not necessarily conform to `UAX-31`_. + +A non-normative listing of characters in the *XID_Start* and *XID_Continue* +sets as defined by Unicode is available in the `DerivedCoreProperties.txt`_ +file in the Unicode Character Database. +For reference, the construction rules for the ``xid_*`` sets are given below. + +The set ``id_start`` is defined as the union of: + +* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) +* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) +* Unicode category ```` - titlecase letters +* Unicode category ```` - modifier letters +* Unicode category ```` - other letters +* Unicode category ```` - letter numbers +* {``"_"``} - the underscore +* ```` - an explicit set of characters in `PropList.txt`_ + to support backwards compatibility + +The set ``xid_start`` then closes this set under NFKC normalization, by +removing all characters whose normalization is not of the form +``id_start id_continue*``. + +The set ``id_continue`` is defined as the union of: + +* ``id_start`` (see above) +* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) +* Unicode category ```` - connector punctuations +* Unicode category ```` - nonspacing marks +* Unicode category ```` - spacing combining marks +* ```` - another explicit set of characters in + `PropList.txt`_ to support backwards compatibility + +Again, ``xid_continue`` closes this set under NFKC normalization. + +Unicode categories use the version of the Unicode Character Database as +included in the :mod:`unicodedata` module. + +.. _UAX-31: https://www.unicode.org/reports/tr31/ +.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt +.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms + +.. seealso:: + + * :pep:`3131` -- Supporting Non-ASCII Identifiers + * :pep:`672` -- Unicode-related Security Considerations for Python + + .. _literals: Literals @@ -434,79 +617,110 @@ Literals Literals are notations for constant values of some built-in types. +In terms of lexical analysis, Python has :ref:`string, bytes ` +and :ref:`numeric ` literals. + +Other "literals" are lexically denoted using :ref:`keywords ` +(``None``, ``True``, ``False``) and the special +:ref:`ellipsis token ` (``...``). + .. index:: string literal, bytes literal, ASCII single: ' (single quote); string literal single: " (double quote); string literal - single: u'; string literal - single: u"; string literal .. _strings: String and Bytes literals -------------------------- +========================= + +String literals are text enclosed in single quotes (``'``) or double +quotes (``"``). For example: + +.. code-block:: python + + "spam" + 'eggs' + +The quote used to start the literal also terminates it, so a string literal +can only contain the other quote (except with escape sequences, see below). +For example: + +.. code-block:: python -String literals are described by the following lexical definitions: - -.. productionlist:: python-grammar - stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" - : | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" - shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' - longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' - shortstringitem: `shortstringchar` | `stringescapeseq` - longstringitem: `longstringchar` | `stringescapeseq` - shortstringchar: - longstringchar: - stringescapeseq: "\" - -.. productionlist:: python-grammar - bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`) - bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" - shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"' - longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""' - shortbytesitem: `shortbyteschar` | `bytesescapeseq` - longbytesitem: `longbyteschar` | `bytesescapeseq` - shortbyteschar: - longbyteschar: - bytesescapeseq: "\" - -One syntactic restriction not indicated by these productions is that whitespace -is not allowed between the :token:`~python-grammar:stringprefix` or -:token:`~python-grammar:bytesprefix` and the rest of the literal. The source -character set is defined by the encoding declaration; it is UTF-8 if no encoding -declaration is given in the source file; see section :ref:`encodings`. - -.. index:: triple-quoted string, Unicode Consortium, raw string + 'Say "Hello", please.' + "Don't do that!" + +Except for this limitation, the choice of quote character (``'`` or ``"``) +does not affect how the literal is parsed. + +Inside a string literal, the backslash (``\``) character introduces an +:dfn:`escape sequence`, which has special meaning depending on the character +after the backslash. +For example, ``\"`` denotes the double quote character, and does *not* end +the string: + +.. code-block:: pycon + + >>> print("Say \"Hello\" to everyone!") + Say "Hello" to everyone! + +See :ref:`escape sequences ` below for a full list of such +sequences, and more details. + + +.. index:: triple-quoted string single: """; string literal single: '''; string literal -In plain English: Both types of literals can be enclosed in matching single quotes -(``'``) or double quotes (``"``). They can also be enclosed in matching groups -of three single or double quotes (these are generally referred to as -*triple-quoted strings*). The backslash (``\``) character is used to give special -meaning to otherwise ordinary characters like ``n``, which means 'newline' when -escaped (``\n``). It can also be used to escape characters that otherwise have a -special meaning, such as newline, backslash itself, or the quote character. -See :ref:`escape sequences ` below for examples. +Triple-quoted strings +--------------------- + +Strings can also be enclosed in matching groups of three single or double +quotes. +These are generally referred to as :dfn:`triple-quoted strings`:: -.. index:: - single: b'; bytes literal - single: b"; bytes literal + """This is a triple-quoted string.""" + +In triple-quoted literals, unescaped quotes are allowed (and are +retained), except that three unescaped quotes in a row terminate the literal, +if they are of the same kind (``'`` or ``"``) used at the start:: + + """This string has "quotes" inside.""" + +Unescaped newlines are also allowed and retained:: + + '''This triple-quoted string + continues on the next line.''' -Bytes literals are always prefixed with ``'b'`` or ``'B'``; they produce an -instance of the :class:`bytes` type instead of the :class:`str` type. They -may only contain ASCII characters; bytes with a numeric value of 128 or greater -must be expressed with escapes. .. index:: - single: r'; raw string literal - single: r"; raw string literal + single: u'; string literal + single: u"; string literal -Both string and bytes literals may optionally be prefixed with a letter ``'r'`` -or ``'R'``; such constructs are called :dfn:`raw string literals` -and :dfn:`raw bytes literals` respectively and treat backslashes as -literal characters. As a result, in raw string literals, ``'\U'`` and ``'\u'`` -escapes are not treated specially. +String prefixes +--------------- + +String literals can have an optional :dfn:`prefix` that influences how the +content of the literal is parsed, for example: + +.. code-block:: python + + b"data" + f'{result=}' + +The allowed prefixes are: + +* ``b``: :ref:`Bytes literal ` +* ``r``: :ref:`Raw string ` +* ``f``: :ref:`Formatted string literal ` ("f-string") +* ``t``: :ref:`Template string literal ` ("t-string") +* ``u``: No effect (allowed for backwards compatibility) + +See the linked sections for details on each type. + +Prefixes are case-insensitive (for example, '``B``' works the same as '``b``'). +The '``r``' prefix can be combined with '``f``', '``t``' or '``b``', so '``fr``', +'``rf``', '``tr``', '``rt``', '``br``', and '``rb``' are also valid prefixes. .. versionadded:: 3.3 The ``'rb'`` prefix of raw bytes literals has been added as a synonym @@ -516,18 +730,35 @@ escapes are not treated specially. to simplify the maintenance of dual Python 2.x and 3.x codebases. See :pep:`414` for more information. -.. index:: - single: f'; formatted string literal - single: f"; formatted string literal -A string literal with ``'f'`` or ``'F'`` in its prefix is a -:dfn:`formatted string literal`; see :ref:`f-strings`. The ``'f'`` may be -combined with ``'r'``, but not with ``'b'`` or ``'u'``, therefore raw -formatted strings are possible, but formatted bytes literals are not. +Formal grammar +-------------- + +String literals, except :ref:`"f-strings" ` and +:ref:`"t-strings" `, are described by the +following lexical definitions. + +These definitions use :ref:`negative lookaheads ` (``!``) +to indicate that an ending quote ends the literal. -In triple-quoted literals, unescaped newlines and quotes are allowed (and are -retained), except that three unescaped quotes in a row terminate the literal. (A -"quote" is the character used to open the literal, i.e. either ``'`` or ``"``.) +.. grammar-snippet:: + :group: python-grammar + + STRING: [`stringprefix`] (`stringcontent`) + stringprefix: <("r" | "u" | "b" | "br" | "rb"), case-insensitive> + stringcontent: + | "'''" ( !"'''" `longstringitem`)* "'''" + | '"""' ( !'"""' `longstringitem`)* '"""' + | "'" ( !"'" `stringitem`)* "'" + | '"' ( !'"' `stringitem`)* '"' + stringitem: `stringchar` | `stringescapeseq` + stringchar: + longstringitem: `stringitem` | newline + stringescapeseq: "\" + +Note that as in all lexical definitions, whitespace is significant. +In particular, the prefix (if any) must be immediately followed by the starting +quote. .. index:: physical line, escape sequence, Standard C, C single: \ (backslash); escape sequence @@ -546,120 +777,237 @@ retained), except that three unescaped quotes in a row terminate the literal. ( .. _escape-sequences: - Escape sequences -^^^^^^^^^^^^^^^^ +---------------- -Unless an ``'r'`` or ``'R'`` prefix is present, escape sequences in string and +Unless an '``r``' or '``R``' prefix is present, escape sequences in string and bytes literals are interpreted according to rules similar to those used by Standard C. The recognized escape sequences are: -+-------------------------+---------------------------------+-------+ -| Escape Sequence | Meaning | Notes | -+=========================+=================================+=======+ -| ``\``\ | Backslash and newline ignored | \(1) | -+-------------------------+---------------------------------+-------+ -| ``\\`` | Backslash (``\``) | | -+-------------------------+---------------------------------+-------+ -| ``\'`` | Single quote (``'``) | | -+-------------------------+---------------------------------+-------+ -| ``\"`` | Double quote (``"``) | | -+-------------------------+---------------------------------+-------+ -| ``\a`` | ASCII Bell (BEL) | | -+-------------------------+---------------------------------+-------+ -| ``\b`` | ASCII Backspace (BS) | | -+-------------------------+---------------------------------+-------+ -| ``\f`` | ASCII Formfeed (FF) | | -+-------------------------+---------------------------------+-------+ -| ``\n`` | ASCII Linefeed (LF) | | -+-------------------------+---------------------------------+-------+ -| ``\r`` | ASCII Carriage Return (CR) | | -+-------------------------+---------------------------------+-------+ -| ``\t`` | ASCII Horizontal Tab (TAB) | | -+-------------------------+---------------------------------+-------+ -| ``\v`` | ASCII Vertical Tab (VT) | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\\\{ooo}` | Character with octal value | (2,4) | -| | *ooo* | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\x{hh}` | Character with hex value *hh* | (3,4) | -+-------------------------+---------------------------------+-------+ - -Escape sequences only recognized in string literals are: - -+-------------------------+---------------------------------+-------+ -| Escape Sequence | Meaning | Notes | -+=========================+=================================+=======+ -| :samp:`\\N\\{{name}\\}` | Character named *name* in the | \(5) | -| | Unicode database | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\u{xxxx}` | Character with 16-bit hex value | \(6) | -| | *xxxx* | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\U{xxxxxxxx}` | Character with 32-bit hex value | \(7) | -| | *xxxxxxxx* | | -+-------------------------+---------------------------------+-------+ - -Notes: - -(1) - A backslash can be added at the end of a line to ignore the newline:: - - >>> 'This string will not include \ - ... backslashes or newline characters.' - 'This string will not include backslashes or newline characters.' - - The same result can be achieved using :ref:`triple-quoted strings `, - or parentheses and :ref:`string literal concatenation `. - - -(2) - As in Standard C, up to three octal digits are accepted. - - .. versionchanged:: 3.11 - Octal escapes with value larger than ``0o377`` produce a - :exc:`DeprecationWarning`. - - .. versionchanged:: 3.12 - Octal escapes with value larger than ``0o377`` produce a - :exc:`SyntaxWarning`. In a future Python version they will be eventually - a :exc:`SyntaxError`. - -(3) - Unlike in Standard C, exactly two hex digits are required. - -(4) - In a bytes literal, hexadecimal and octal escapes denote the byte with the - given value. In a string literal, these escapes denote a Unicode character - with the given value. - -(5) - .. versionchanged:: 3.3 - Support for name aliases [#]_ has been added. - -(6) - Exactly four hex digits are required. - -(7) - Any Unicode character can be encoded this way. Exactly eight hex digits - are required. +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Escape Sequence + * Meaning + * * ``\``\ + * :ref:`string-escape-ignore` + * * ``\\`` + * :ref:`Backslash ` + * * ``\'`` + * :ref:`Single quote ` + * * ``\"`` + * :ref:`Double quote ` + * * ``\a`` + * ASCII Bell (BEL) + * * ``\b`` + * ASCII Backspace (BS) + * * ``\f`` + * ASCII Formfeed (FF) + * * ``\n`` + * ASCII Linefeed (LF) + * * ``\r`` + * ASCII Carriage Return (CR) + * * ``\t`` + * ASCII Horizontal Tab (TAB) + * * ``\v`` + * ASCII Vertical Tab (VT) + * * :samp:`\\\\{ooo}` + * :ref:`string-escape-oct` + * * :samp:`\\x{hh}` + * :ref:`string-escape-hex` + * * :samp:`\\N\\{{name}\\}` + * :ref:`string-escape-named` + * * :samp:`\\u{xxxx}` + * :ref:`Hexadecimal Unicode character ` + * * :samp:`\\U{xxxxxxxx}` + * :ref:`Hexadecimal Unicode character ` + +.. _string-escape-ignore: + +Ignored end of line +^^^^^^^^^^^^^^^^^^^ + +A backslash can be added at the end of a line to ignore the newline:: + + >>> 'This string will not include \ + ... backslashes or newline characters.' + 'This string will not include backslashes or newline characters.' + +The same result can be achieved using :ref:`triple-quoted strings `, +or parentheses and :ref:`string literal concatenation `. + +.. _string-escape-escaped-char: + +Escaped characters +^^^^^^^^^^^^^^^^^^ + +To include a backslash in a non-:ref:`raw ` Python string +literal, it must be doubled. The ``\\`` escape sequence denotes a single +backslash character:: + + >>> print('C:\\Program Files') + C:\Program Files + +Similarly, the ``\'`` and ``\"`` sequences denote the single and double +quote character, respectively:: + + >>> print('\' and \"') + ' and " + +.. _string-escape-oct: + +Octal character +^^^^^^^^^^^^^^^ + +The sequence :samp:`\\\\{ooo}` denotes a *character* with the octal (base 8) +value *ooo*:: + + >>> '\120' + 'P' + +Up to three octal digits (0 through 7) are accepted. + +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given value. + +.. versionchanged:: 3.11 + Octal escapes with value larger than ``0o377`` (255) produce a + :exc:`DeprecationWarning`. + +.. versionchanged:: 3.12 + Octal escapes with value larger than ``0o377`` (255) produce a + :exc:`SyntaxWarning`. + In a future Python version they will raise a :exc:`SyntaxError`. + +.. _string-escape-hex: + +Hexadecimal character +^^^^^^^^^^^^^^^^^^^^^ + +The sequence :samp:`\\x{hh}` denotes a *character* with the hex (base 16) +value *hh*:: + + >>> '\x50' + 'P' + +Unlike in Standard C, exactly two hex digits are required. + +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given value. + +.. _string-escape-named: + +Named Unicode character +^^^^^^^^^^^^^^^^^^^^^^^ + +The sequence :samp:`\\N\\{{name}\\}` denotes a Unicode character +with the given *name*:: + + >>> '\N{LATIN CAPITAL LETTER P}' + 'P' + >>> '\N{SNAKE}' + '🐍' + +This sequence cannot appear in :ref:`bytes literals `. + +.. versionchanged:: 3.3 + Support for `name aliases `__ + has been added. + +.. _string-escape-long-hex: + +Hexadecimal Unicode characters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These sequences :samp:`\\u{xxxx}` and :samp:`\\U{xxxxxxxx}` denote the +Unicode character with the given hex (base 16) value. +Exactly four digits are required for ``\u``; exactly eight digits are +required for ``\U``. +The latter can encode any Unicode character. + +.. code-block:: pycon + + >>> '\u1234' + 'ሴ' + >>> '\U0001f40d' + '🐍' + +These sequences cannot appear in :ref:`bytes literals `. .. index:: unrecognized escape sequence -Unlike Standard C, all unrecognized escape sequences are left in the string -unchanged, i.e., *the backslash is left in the result*. (This behavior is -useful when debugging: if an escape sequence is mistyped, the resulting output -is more easily recognized as broken.) It is also important to note that the -escape sequences only recognized in string literals fall into the category of -unrecognized escapes for bytes literals. +Unrecognized escape sequences +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Unlike in Standard C, all unrecognized escape sequences are left in the string +unchanged, that is, *the backslash is left in the result*:: + + >>> print('\q') + \q + >>> list('\q') + ['\\', 'q'] + +Note that for bytes literals, the escape sequences only recognized in string +literals (``\N...``, ``\u...``, ``\U...``) fall into the category of +unrecognized escapes. .. versionchanged:: 3.6 Unrecognized escape sequences produce a :exc:`DeprecationWarning`. .. versionchanged:: 3.12 - Unrecognized escape sequences produce a :exc:`SyntaxWarning`. In a future - Python version they will be eventually a :exc:`SyntaxError`. + Unrecognized escape sequences produce a :exc:`SyntaxWarning`. + In a future Python version they will raise a :exc:`SyntaxError`. + + +.. index:: + single: b'; bytes literal + single: b"; bytes literal + + +.. _bytes-literal: + +Bytes literals +-------------- + +:dfn:`Bytes literals` are always prefixed with '``b``' or '``B``'; they produce an +instance of the :class:`bytes` type instead of the :class:`str` type. +They may only contain ASCII characters; bytes with a numeric value of 128 +or greater must be expressed with escape sequences (typically +:ref:`string-escape-hex` or :ref:`string-escape-oct`): + +.. code-block:: pycon + + >>> b'\x89PNG\r\n\x1a\n' + b'\x89PNG\r\n\x1a\n' + >>> list(b'\x89PNG\r\n\x1a\n') + [137, 80, 78, 71, 13, 10, 26, 10] + +Similarly, a zero byte must be expressed using an escape sequence (typically +``\0`` or ``\x00``). + + +.. index:: + single: r'; raw string literal + single: r"; raw string literal + +.. _raw-strings: + +Raw string literals +------------------- + +Both string and bytes literals may optionally be prefixed with a letter '``r``' +or '``R``'; such constructs are called :dfn:`raw string literals` +and :dfn:`raw bytes literals` respectively and treat backslashes as +literal characters. +As a result, in raw string literals, :ref:`escape sequences ` +are not treated specially: + +.. code-block:: pycon + + >>> r'\d{4}-\d{2}-\d{2}' + '\\d{4}-\\d{2}-\\d{2}' Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, ``r"\""`` is a valid string @@ -671,29 +1019,6 @@ that a single backslash followed by a newline is interpreted as those two characters as part of the literal, *not* as a line continuation. -.. _string-concatenation: - -String literal concatenation ----------------------------- - -Multiple adjacent string or bytes literals (delimited by whitespace), possibly -using different quoting conventions, are allowed, and their meaning is the same -as their concatenation. Thus, ``"hello" 'world'`` is equivalent to -``"helloworld"``. This feature can be used to reduce the number of backslashes -needed, to split long strings conveniently across long lines, or even to add -comments to parts of strings, for example:: - - re.compile("[A-Za-z_]" # letter or underscore - "[A-Za-z0-9_]*" # letter, digit or underscore - ) - -Note that this feature is defined at the syntactical level, but implemented at -compile time. The '+' operator must be used to concatenate string expressions -at run time. Also note that literal concatenation can use different quoting -styles for each component (even mixing raw strings and triple quoted strings), -and formatted string literals may be concatenated with plain string literals. - - .. index:: single: formatted string literal single: interpolated string literal @@ -701,6 +1026,8 @@ and formatted string literals may be concatenated with plain string literals. single: string; interpolated literal single: f-string single: fstring + single: f'; formatted string literal + single: f"; formatted string literal single: {} (curly brackets); in formatted string literal single: ! (exclamation); in formatted string literal single: : (colon); in formatted string literal @@ -713,124 +1040,59 @@ f-strings --------- .. versionadded:: 3.6 +.. versionchanged:: 3.7 + The :keyword:`await` and :keyword:`async for` can be used in expressions + within f-strings. +.. versionchanged:: 3.8 + Added the debug specifier (``=``) +.. versionchanged:: 3.12 + Many restrictions on expressions within f-strings have been removed. + Notably, nested strings, comments, and backslashes are now permitted. A :dfn:`formatted string literal` or :dfn:`f-string` is a string literal -that is prefixed with ``'f'`` or ``'F'``. These strings may contain -replacement fields, which are expressions delimited by curly braces ``{}``. -While other string literals always have a constant value, formatted strings -are really expressions evaluated at run time. - -Escape sequences are decoded like in ordinary string literals (except when -a literal is also marked as a raw string). After decoding, the grammar -for the contents of the string is: - -.. productionlist:: python-grammar - f_string: (`literal_char` | "{{" | "}}" | `replacement_field`)* - replacement_field: "{" `f_expression` ["="] ["!" `conversion`] [":" `format_spec`] "}" - f_expression: (`conditional_expression` | "*" `or_expr`) - : ("," `conditional_expression` | "," "*" `or_expr`)* [","] - : | `yield_expression` - conversion: "s" | "r" | "a" - format_spec: (`literal_char` | `replacement_field`)* - literal_char: - -The parts of the string outside curly braces are treated literally, -except that any doubled curly braces ``'{{'`` or ``'}}'`` are replaced -with the corresponding single curly brace. A single opening curly -bracket ``'{'`` marks a replacement field, which starts with a -Python expression. To display both the expression text and its value after -evaluation, (useful in debugging), an equal sign ``'='`` may be added after the -expression. A conversion field, introduced by an exclamation point ``'!'`` may -follow. A format specifier may also be appended, introduced by a colon ``':'``. -A replacement field ends with a closing curly bracket ``'}'``. +that is prefixed with '``f``' or '``F``'. +Unlike other string literals, f-strings do not have a constant value. +They may contain *replacement fields* delimited by curly braces ``{}``. +Replacement fields contain expressions which are evaluated at run time. +For example:: + + >>> who = 'nobody' + >>> nationality = 'Spanish' + >>> f'{who.title()} expects the {nationality} Inquisition!' + 'Nobody expects the Spanish Inquisition!' + +Any doubled curly braces (``{{`` or ``}}``) outside replacement fields +are replaced with the corresponding single curly brace:: + + >>> print(f'{{...}}') + {...} + +Other characters outside replacement fields are treated like in ordinary +string literals. +This means that escape sequences are decoded (except when a literal is +also marked as a raw string), and newlines are possible in triple-quoted +f-strings:: + + >>> name = 'Galahad' + >>> favorite_color = 'blue' + >>> print(f'{name}:\t{favorite_color}') + Galahad: blue + >>> print(rf"C:\Users\{name}") + C:\Users\Galahad + >>> print(f'''Three shall be the number of the counting + ... and the number of the counting shall be three.''') + Three shall be the number of the counting + and the number of the counting shall be three. Expressions in formatted string literals are treated like regular -Python expressions surrounded by parentheses, with a few exceptions. -An empty expression is not allowed, and both :keyword:`lambda` and -assignment expressions ``:=`` must be surrounded by explicit parentheses. +Python expressions. Each expression is evaluated in the context where the formatted string literal -appears, in order from left to right. Replacement expressions can contain -newlines in both single-quoted and triple-quoted f-strings and they can contain -comments. Everything that comes after a ``#`` inside a replacement field -is a comment (even closing braces and quotes). In that case, replacement fields -must be closed in a different line. - -.. code-block:: text - - >>> f"abc{a # This is a comment }" - ... + 3}" - 'abc5' - -.. versionchanged:: 3.7 - Prior to Python 3.7, an :keyword:`await` expression and comprehensions - containing an :keyword:`async for` clause were illegal in the expressions - in formatted string literals due to a problem with the implementation. - -.. versionchanged:: 3.12 - Prior to Python 3.12, comments were not allowed inside f-string replacement - fields. - -When the equal sign ``'='`` is provided, the output will have the expression -text, the ``'='`` and the evaluated value. Spaces after the opening brace -``'{'``, within the expression and after the ``'='`` are all retained in the -output. By default, the ``'='`` causes the :func:`repr` of the expression to be -provided, unless there is a format specified. When a format is specified it -defaults to the :func:`str` of the expression unless a conversion ``'!r'`` is -declared. - -.. versionadded:: 3.8 - The equal sign ``'='``. - -If a conversion is specified, the result of evaluating the expression -is converted before formatting. Conversion ``'!s'`` calls :func:`str` on -the result, ``'!r'`` calls :func:`repr`, and ``'!a'`` calls :func:`ascii`. - -The result is then formatted using the :func:`format` protocol. The -format specifier is passed to the :meth:`~object.__format__` method of the -expression or conversion result. An empty string is passed when the -format specifier is omitted. The formatted result is then included in -the final value of the whole string. - -Top-level format specifiers may include nested replacement fields. These nested -fields may include their own conversion fields and :ref:`format specifiers -`, but may not include more deeply nested replacement fields. The -:ref:`format specifier mini-language ` is the same as that used by -the :meth:`str.format` method. - -Formatted string literals may be concatenated, but replacement fields -cannot be split across literals. - -Some examples of formatted string literals:: - - >>> name = "Fred" - >>> f"He said his name is {name!r}." - "He said his name is 'Fred'." - >>> f"He said his name is {repr(name)}." # repr() is equivalent to !r - "He said his name is 'Fred'." - >>> width = 10 - >>> precision = 4 - >>> value = decimal.Decimal("12.34567") - >>> f"result: {value:{width}.{precision}}" # nested fields - 'result: 12.35' - >>> today = datetime(year=2017, month=1, day=27) - >>> f"{today:%B %d, %Y}" # using date format specifier - 'January 27, 2017' - >>> f"{today=:%B %d, %Y}" # using date format specifier and debugging - 'today=January 27, 2017' - >>> number = 1024 - >>> f"{number:#0x}" # using integer format specifier - '0x400' - >>> foo = "bar" - >>> f"{ foo = }" # preserves whitespace - " foo = 'bar'" - >>> line = "The mill's closed" - >>> f"{line = }" - 'line = "The mill\'s closed"' - >>> f"{line = :20}" - "line = The mill's closed " - >>> f"{line = !r:20}" - 'line = "The mill\'s closed" ' +appears, in order from left to right. +An empty expression is not allowed, and both :keyword:`lambda` and +assignment expressions ``:=`` must be surrounded by explicit parentheses:: + >>> f'{(half := 1/2)}, {half * 42}' + '0.5, 21.0' Reusing the outer f-string quoting type inside a replacement field is permitted:: @@ -839,10 +1101,6 @@ permitted:: >>> f"abc {a["x"]} def" 'abc 2 def' -.. versionchanged:: 3.12 - Prior to Python 3.12, reuse of the same quoting type of the outer f-string - inside a replacement field was not possible. - Backslashes are also allowed in replacement fields and are evaluated the same way as in any other context:: @@ -853,39 +1111,214 @@ way as in any other context:: b c -.. versionchanged:: 3.12 - Prior to Python 3.12, backslashes were not permitted inside an f-string - replacement field. +It is possible to nest f-strings:: + + >>> name = 'world' + >>> f'Repeated:{f' hello {name}' * 3}' + 'Repeated: hello world hello world hello world' + +Portable Python programs should not use more than 5 levels of nesting. + +.. impl-detail:: + + CPython does not limit nesting of f-strings. + +Replacement expressions can contain newlines in both single-quoted and +triple-quoted f-strings and they can contain comments. +Everything that comes after a ``#`` inside a replacement field +is a comment (even closing braces and quotes). +This means that replacement fields with comments must be closed in a +different line: + +.. code-block:: text + + >>> a = 2 + >>> f"abc{a # This comment }" continues until the end of the line + ... + 3}" + 'abc5' + +After the expression, replacement fields may optionally contain: + +* a *debug specifier* -- an equal sign (``=``), optionally surrounded by + whitespace on one or both sides; +* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or +* a *format specifier* prefixed with a colon (``:``). + +See the :ref:`Standard Library section on f-strings ` +for details on how these fields are evaluated. + +As that section explains, *format specifiers* are passed as the second argument +to the :func:`format` function to format a replacement field value. +For example, they can be used to specify a field width and padding characters +using the :ref:`Format Specification Mini-Language `:: + + >>> number = 14.3 + >>> f'{number:20.7f}' + ' 14.3000000' + +Top-level format specifiers may include nested replacement fields:: + + >>> field_size = 20 + >>> precision = 7 + >>> f'{number:{field_size}.{precision}f}' + ' 14.3000000' + +These nested fields may include their own conversion fields and +:ref:`format specifiers `:: + + >>> number = 3 + >>> f'{number:{field_size}}' + ' 3' + >>> f'{number:{field_size:05}}' + '00000000000000000003' -Formatted string literals cannot be used as docstrings, even if they do not -include expressions. +However, these nested fields may not include more deeply nested replacement +fields. -:: +Formatted string literals cannot be used as :term:`docstrings `, +even if they do not include expressions:: >>> def foo(): ... f"Not a docstring" ... - >>> foo.__doc__ is None - True + >>> print(foo.__doc__) + None -See also :pep:`498` for the proposal that added formatted string literals, -and :meth:`str.format`, which uses a related format string mechanism. +.. seealso:: + + * :pep:`498` -- Literal String Interpolation + * :pep:`701` -- Syntactic formalization of f-strings + * :meth:`str.format`, which uses a related format string mechanism. + + +.. _t-strings: +.. _template-string-literals: + +t-strings +--------- + +.. versionadded:: 3.14 + +A :dfn:`template string literal` or :dfn:`t-string` is a string literal +that is prefixed with '``t``' or '``T``'. +These strings follow the same syntax rules as +:ref:`formatted string literals `. +For differences in evaluation rules, see the +:ref:`Standard Library section on t-strings ` + + +Formal grammar for f-strings +---------------------------- + +F-strings are handled partly by the :term:`lexical analyzer`, which produces the +tokens :py:data:`~token.FSTRING_START`, :py:data:`~token.FSTRING_MIDDLE` +and :py:data:`~token.FSTRING_END`, and partly by the parser, which handles +expressions in the replacement field. +The exact way the work is split is a CPython implementation detail. + +Correspondingly, the f-string grammar is a mix of +:ref:`lexical and syntactic definitions `. + +Whitespace is significant in these situations: + +* There may be no whitespace in :py:data:`~token.FSTRING_START` (between + the prefix and quote). +* Whitespace in :py:data:`~token.FSTRING_MIDDLE` is part of the literal + string contents. +* In ``fstring_replacement_field``, if ``f_debug_specifier`` is present, + all whitespace after the opening brace until the ``f_debug_specifier``, + as well as whitespace immediatelly following ``f_debug_specifier``, + is retained as part of the expression. + + .. impl-detail:: + + The expression is not handled in the tokenization phase; it is + retrieved from the source code using locations of the ``{`` token + and the token after ``=``. + + +The ``FSTRING_MIDDLE`` definition uses +:ref:`negative lookaheads ` (``!``) +to indicate special characters (backslash, newline, ``{``, ``}``) and +sequences (``f_quote``). + +.. grammar-snippet:: + :group: python-grammar + + fstring: `FSTRING_START` `fstring_middle`* `FSTRING_END` + + FSTRING_START: `fstringprefix` ("'" | '"' | "'''" | '"""') + FSTRING_END: `f_quote` + fstringprefix: <("f" | "fr" | "rf"), case-insensitive> + f_debug_specifier: '=' + f_quote: + + fstring_middle: + | `fstring_replacement_field` + | `FSTRING_MIDDLE` + FSTRING_MIDDLE: + | (!"\" !`newline` !'{' !'}' !`f_quote`) `source_character` + | `stringescapeseq` + | "{{" + | "}}" + | + fstring_replacement_field: + | '{' `f_expression` [`f_debug_specifier`] [`fstring_conversion`] + [`fstring_full_format_spec`] '}' + fstring_conversion: + | "!" ("s" | "r" | "a") + fstring_full_format_spec: + | ':' `fstring_format_spec`* + fstring_format_spec: + | `FSTRING_MIDDLE` + | `fstring_replacement_field` + f_expression: + | ','.(`conditional_expression` | "*" `or_expr`)+ [","] + | `yield_expression` + +.. note:: + + In the above grammar snippet, the ``f_quote`` and ``FSTRING_MIDDLE`` rules + are context-sensitive -- they depend on the contents of ``FSTRING_START`` + of the nearest enclosing ``fstring``. + + Constructing a more traditional formal grammar from this template is left + as an exercise for the reader. + +The grammar for t-strings is identical to the one for f-strings, with *t* +instead of *f* at the beginning of rule and token names and in the prefix. + +.. grammar-snippet:: + :group: python-grammar + + tstring: TSTRING_START tstring_middle* TSTRING_END + + .. _numbers: Numeric literals ----------------- +================ .. index:: number, numeric literal, integer literal floating-point literal, hexadecimal literal octal literal, binary literal, decimal literal, imaginary literal, complex literal -There are three types of numeric literals: integers, floating-point numbers, and -imaginary numbers. There are no complex literals (complex numbers can be formed -by adding a real number and an imaginary number). +:data:`~token.NUMBER` tokens represent numeric literals, of which there are +three types: integers, floating-point numbers, and imaginary numbers. -Note that numeric literals do not include a sign; a phrase like ``-1`` is +.. grammar-snippet:: + :group: python-grammar + + NUMBER: `integer` | `floatnumber` | `imagnumber` + +The numeric value of a numeric literal is the same as if it were passed as a +string to the :class:`int`, :class:`float` or :class:`complex` class +constructor, respectively. +Note that not all valid inputs for those constructors are also valid literals. + +Numeric literals do not include a sign; a phrase like ``-1`` is actually an expression composed of the unary operator '``-``' and the literal ``1``. @@ -901,36 +1334,65 @@ actually an expression composed of the unary operator '``-``' and the literal Integer literals ---------------- -Integer literals are described by the following lexical definitions: +Integer literals denote whole numbers. For example:: -.. productionlist:: python-grammar - integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` - decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")* - bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ - octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ - hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ - nonzerodigit: "1"..."9" - digit: "0"..."9" - bindigit: "0" | "1" - octdigit: "0"..."7" - hexdigit: `digit` | "a"..."f" | "A"..."F" + 7 + 3 + 2147483647 There is no limit for the length of integer literals apart from what can be -stored in available memory. +stored in available memory:: -Underscores are ignored for determining the numeric value of the literal. They -can be used to group digits for enhanced readability. One underscore can occur -between digits, and after base specifiers like ``0x``. + 7922816251426433759354395033679228162514264337593543950336 -Note that leading zeros in a non-zero decimal number are not allowed. This is -for disambiguation with C-style octal literals, which Python used before version -3.0. +Underscores can be used to group digits for enhanced readability, +and are ignored for determining the numeric value of the literal. +For example, the following literals are equivalent:: -Some examples of integer literals:: + 100_000_000_000 + 100000000000 + 1_00_00_00_00_000 - 7 2147483647 0o177 0b100110111 - 3 79228162514264337593543950336 0o377 0xdeadbeef - 100_000_000_000 0b_1110_0101 +Underscores can only occur between digits. +For example, ``_123``, ``321_``, and ``123__321`` are *not* valid literals. + +Integers can be specified in binary (base 2), octal (base 8), or hexadecimal +(base 16) using the prefixes ``0b``, ``0o`` and ``0x``, respectively. +Hexadecimal digits 10 through 15 are represented by letters ``A``-``F``, +case-insensitive. For example:: + + 0b100110111 + 0b_1110_0101 + 0o177 + 0o377 + 0xdeadbeef + 0xDead_Beef + +An underscore can follow the base specifier. +For example, ``0x_1f`` is a valid literal, but ``0_x1f`` and ``0x__1f`` are +not. + +Leading zeros in a non-zero decimal number are not allowed. +For example, ``0123`` is not a valid literal. +This is for disambiguation with C-style octal literals, which Python used +before version 3.0. + +Formally, integer literals are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` | `zerointeger` + decinteger: `nonzerodigit` (["_"] `digit`)* + bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ + octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ + hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ + zerointeger: "0"+ (["_"] "0")* + nonzerodigit: "1"..."9" + digit: "0"..."9" + bindigit: "0" | "1" + octdigit: "0"..."7" + hexdigit: `digit` | "a"..."f" | "A"..."F" .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. @@ -945,103 +1407,180 @@ Some examples of integer literals:: Floating-point literals ----------------------- -Floating-point literals are described by the following lexical definitions: +Floating-point (float) literals, such as ``3.14`` or ``1.5``, denote +:ref:`approximations of real numbers `. -.. productionlist:: python-grammar - floatnumber: `pointfloat` | `exponentfloat` - pointfloat: [`digitpart`] `fraction` | `digitpart` "." - exponentfloat: (`digitpart` | `pointfloat`) `exponent` - digitpart: `digit` (["_"] `digit`)* - fraction: "." `digitpart` - exponent: ("e" | "E") ["+" | "-"] `digitpart` +They consist of *integer* and *fraction* parts, each composed of decimal digits. +The parts are separated by a decimal point, ``.``:: -Note that the integer and exponent parts are always interpreted using radix 10. -For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The -allowed range of floating-point literals is implementation-dependent. As in -integer literals, underscores are supported for digit grouping. + 2.71828 + 4.0 -Some examples of floating-point literals:: +Unlike in integer literals, leading zeros are allowed. +For example, ``077.010`` is legal, and denotes the same number as ``77.01``. - 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 +As in integer literals, single underscores may occur between digits to help +readability:: -.. versionchanged:: 3.6 - Underscores are now allowed for grouping purposes in literals. + 96_485.332_123 + 3.14_15_93 +Either of these parts, but not both, can be empty. For example:: -.. index:: - single: j; in numeric literal -.. _imaginary: + 10. # (equivalent to 10.0) + .001 # (equivalent to 0.001) -Imaginary literals ------------------- +Optionally, the integer and fraction may be followed by an *exponent*: +the letter ``e`` or ``E``, followed by an optional sign, ``+`` or ``-``, +and a number in the same format as the integer and fraction parts. +The ``e`` or ``E`` represents "times ten raised to the power of":: -Imaginary literals are described by the following lexical definitions: + 1.0e3 # (represents 1.0×10³, or 1000.0) + 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) + 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) -.. productionlist:: python-grammar - imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") +In floats with only integer and exponent parts, the decimal point may be +omitted:: -An imaginary literal yields a complex number with a real part of 0.0. Complex -numbers are represented as a pair of floating-point numbers and have the same -restrictions on their range. To create a complex number with a nonzero real -part, add a floating-point number to it, e.g., ``(3+4j)``. Some examples of -imaginary literals:: + 1e3 # (equivalent to 1.e3 and 1.0e3) + 0e0 # (equivalent to 0.) - 3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j +Formally, floating-point literals are described by the following +lexical definitions: +.. grammar-snippet:: + :group: python-grammar -.. _operators: + floatnumber: + | `digitpart` "." [`digitpart`] [`exponent`] + | "." `digitpart` [`exponent`] + | `digitpart` `exponent` + digitpart: `digit` (["_"] `digit`)* + exponent: ("e" | "E") ["+" | "-"] `digitpart` + +.. versionchanged:: 3.6 + Underscores are now allowed for grouping purposes in literals. -Operators -========= -.. index:: single: operators +.. index:: + single: j; in numeric literal +.. _imaginary: -The following tokens are operators: +Imaginary literals +------------------ -.. code-block:: none +Python has :ref:`complex number ` objects, but no complex +literals. +Instead, *imaginary literals* denote complex numbers with a zero +real part. +For example, in math, the complex number 3+4.2\ *i* is written +as the real number 3 added to the imaginary number 4.2\ *i*. +Python uses a similar syntax, except the imaginary unit is written as ``j`` +rather than *i*:: - + - * ** / // % @ - << >> & | ^ ~ := - < > <= >= == != + 3+4.2j +This is an expression composed +of the :ref:`integer literal ` ``3``, +the :ref:`operator ` '``+``', +and the :ref:`imaginary literal ` ``4.2j``. +Since these are three separate tokens, whitespace is allowed between them:: -.. _delimiters: + 3 + 4.2j -Delimiters -========== +No whitespace is allowed *within* each token. +In particular, the ``j`` suffix, may not be separated from the number +before it. -.. index:: single: delimiters +The number before the ``j`` has the same syntax as a floating-point literal. +Thus, the following are valid imaginary literals:: -The following tokens serve as delimiters in the grammar: + 4.2j + 3.14j + 10.j + .001j + 1e100j + 3.14e-10j + 3.14_15_93j -.. code-block:: none +Unlike in a floating-point literal the decimal point can be omitted if the +imaginary number only has an integer part. +The number is still evaluated as a floating-point number, not an integer:: - ( ) [ ] { } - , : ! . ; @ = - -> += -= *= /= //= %= - @= &= |= ^= >>= <<= **= + 10j + 0j + 1000000000000000000000000j # equivalent to 1e+24j -The period can also occur in floating-point and imaginary literals. A sequence -of three periods has a special meaning as an ellipsis literal. The second half -of the list, the augmented assignment operators, serve lexically as delimiters, -but also perform an operation. +The ``j`` suffix is case-insensitive. +That means you can use ``J`` instead:: -The following printing ASCII characters have special meaning as part of other -tokens or are otherwise significant to the lexical analyzer: + 3.14J # equivalent to 3.14j -.. code-block:: none +Formally, imaginary literals are described by the following lexical definition: - ' " # \ +.. grammar-snippet:: + :group: python-grammar -The following printing ASCII characters are not used in Python. Their -occurrence outside string literals and comments is an unconditional error: + imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") -.. code-block:: none - $ ? ` +.. _delimiters: +.. _operators: +.. _lexical-ellipsis: +Operators and delimiters +======================== -.. rubric:: Footnotes +.. index:: + single: operators + single: delimiters + +The following grammar defines :dfn:`operator` and :dfn:`delimiter` tokens, +that is, the generic :data:`~token.OP` token type. +A :ref:`list of these tokens and their names ` +is also available in the :mod:`!token` module documentation. + +.. grammar-snippet:: + :group: python-grammar + + OP: + | assignment_operator + | bitwise_operator + | comparison_operator + | enclosing_delimiter + | other_delimiter + | arithmetic_operator + | "..." + | other_op + + assignment_operator: "+=" | "-=" | "*=" | "**=" | "/=" | "//=" | "%=" | + "&=" | "|=" | "^=" | "<<=" | ">>=" | "@=" | ":=" + bitwise_operator: "&" | "|" | "^" | "~" | "<<" | ">>" + comparison_operator: "<=" | ">=" | "<" | ">" | "==" | "!=" + enclosing_delimiter: "(" | ")" | "[" | "]" | "{" | "}" + other_delimiter: "," | ":" | "!" | ";" | "=" | "->" + arithmetic_operator: "+" | "-" | "**" | "*" | "//" | "/" | "%" + other_op: "." | "@" + +.. note:: + + Generally, *operators* are used to combine :ref:`expressions `, + while *delimiters* serve other purposes. + However, there is no clear, formal distinction between the two categories. + + Some tokens can serve as either operators or delimiters, depending on usage. + For example, ``*`` is both the multiplication operator and a delimiter used + for sequence unpacking, and ``@`` is both the matrix multiplication and + a delimiter that introduces decorators. + + For some tokens, the distinction is unclear. + For example, some people consider ``.``, ``(``, and ``)`` to be delimiters, while others + see the :py:func:`getattr` operator and the function call operator(s). + + Some of Python's operators, like ``and``, ``or``, and ``not in``, use + :ref:`keyword ` tokens rather than "symbols" (operator tokens). + +A sequence of three consecutive periods (``...``) has a special +meaning as an :py:data:`Ellipsis` literal. -.. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst index 2a72af4e9a3299..9c022570e7e847 100644 --- a/Doc/reference/simple_stmts.rst +++ b/Doc/reference/simple_stmts.rst @@ -465,8 +465,8 @@ Deletion of a target list recursively deletes each target, from left to right. Deletion of a name removes the binding of that name from the local or global namespace, depending on whether the name occurs in a :keyword:`global` statement -in the same code block. If the name is unbound, a :exc:`NameError` exception -will be raised. +in the same code block. Trying to delete an unbound name raises a +:exc:`NameError` exception. .. index:: pair: attribute; deletion @@ -831,6 +831,9 @@ where the :keyword:`import` statement occurs. .. index:: single: __all__ (optional module attribute) +.. attribute:: module.__all__ + :no-typesetting: + The *public names* defined by a module are determined by checking the module's namespace for a variable named ``__all__``; if defined, it must be a sequence of strings which are names defined or imported by that module. The names @@ -971,10 +974,17 @@ as globals. It would be impossible to assign to a global variable without :keyword:`!global`, although free variables may refer to globals without being declared global. -The :keyword:`global` statement applies to the entire scope of a function or -class body. A :exc:`SyntaxError` is raised if a variable is used or +The :keyword:`!global` statement applies to the entire current scope +(module, function body or class definition). +A :exc:`SyntaxError` is raised if a variable is used or assigned to prior to its global declaration in the scope. +At the module level, all variables are global, so a :keyword:`!global` +statement has no effect. +However, variables must still not be used or +assigned to prior to their :keyword:`!global` declaration. +This requirement is relaxed in the interactive prompt (:term:`REPL`). + .. index:: pair: built-in function; exec pair: built-in function; eval diff --git a/Doc/requirements.txt b/Doc/requirements.txt index a2960ea9aa0203..716772b7f28d99 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -7,11 +7,11 @@ # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. # Keep this version in sync with ``Doc/conf.py``. -sphinx~=8.2.0 +sphinx~=9.0.0 blurb -sphinxext-opengraph~=0.10.0 +sphinxext-opengraph~=0.13.0 sphinx-notfound-page~=1.0.0 # The theme used by the documentation is stored separately, so we need diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 926d2cd42bdf02..520e64f5fe13b9 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -4,22 +4,15 @@ Doc/c-api/descriptor.rst Doc/c-api/float.rst -Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst Doc/c-api/module.rst Doc/c-api/stable.rst -Doc/c-api/type.rst Doc/c-api/typeobj.rst -Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst -Doc/library/decimal.rst Doc/library/email.charset.rst -Doc/library/email.compat32-message.rst Doc/library/email.parser.rst -Doc/library/exceptions.rst -Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst Doc/library/importlib.rst @@ -31,49 +24,27 @@ Doc/library/multiprocessing.rst Doc/library/optparse.rst Doc/library/os.rst Doc/library/pickletools.rst -Doc/library/platform.rst Doc/library/profile.rst Doc/library/pyexpat.rst -Doc/library/resource.rst Doc/library/select.rst -Doc/library/signal.rst -Doc/library/smtplib.rst Doc/library/socket.rst Doc/library/ssl.rst -Doc/library/stdtypes.rst -Doc/library/subprocess.rst Doc/library/termios.rst Doc/library/test.rst Doc/library/tkinter.rst Doc/library/tkinter.scrolledtext.rst Doc/library/tkinter.ttk.rst Doc/library/unittest.mock.rst -Doc/library/unittest.rst Doc/library/urllib.parse.rst Doc/library/urllib.request.rst Doc/library/wsgiref.rst Doc/library/xml.dom.minidom.rst Doc/library/xml.dom.pulldom.rst Doc/library/xml.dom.rst -Doc/library/xml.sax.handler.rst Doc/library/xml.sax.reader.rst Doc/library/xml.sax.rst Doc/library/xmlrpc.client.rst Doc/library/xmlrpc.server.rst -Doc/library/zlib.rst -Doc/reference/compound_stmts.rst -Doc/reference/datamodel.rst -Doc/using/windows.rst Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst -Doc/whatsnew/2.7.rst -Doc/whatsnew/3.3.rst -Doc/whatsnew/3.4.rst -Doc/whatsnew/3.5.rst -Doc/whatsnew/3.6.rst -Doc/whatsnew/3.7.rst -Doc/whatsnew/3.8.rst -Doc/whatsnew/3.9.rst -Doc/whatsnew/3.10.rst -Doc/whatsnew/3.11.rst diff --git a/Doc/tools/check-epub.py b/Doc/tools/check-epub.py new file mode 100644 index 00000000000000..6a10096c117542 --- /dev/null +++ b/Doc/tools/check-epub.py @@ -0,0 +1,30 @@ +from pathlib import Path + +CPYTHON_ROOT = Path( + __file__, # cpython/Doc/tools/check-epub.py + '..', # cpython/Doc/tools + '..', # cpython/Doc + '..', # cpython +).resolve() +EPUBCHECK_PATH = CPYTHON_ROOT / 'Doc' / 'epubcheck.txt' + + +def main() -> int: + lines = EPUBCHECK_PATH.read_text(encoding='utf-8').splitlines() + fatal_errors = [line for line in lines if line.startswith('FATAL')] + + if fatal_errors: + err_count = len(fatal_errors) + s = 's' * (err_count != 1) + print() + print(f'Error: epubcheck reported {err_count} fatal error{s}:') + print() + print('\n'.join(fatal_errors)) + return 1 + + print('Success: no fatal errors found.') + return 0 + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/Doc/tools/check-warnings.py b/Doc/tools/check-warnings.py index c686eecf8d9271..f9b8c45a78af49 100644 --- a/Doc/tools/check-warnings.py +++ b/Doc/tools/check-warnings.py @@ -15,7 +15,7 @@ from typing import TextIO # Fail if NEWS nit found before this line number -NEWS_NIT_THRESHOLD = 1700 +NEWS_NIT_THRESHOLD = 7850 # Exclude these whether they're dirty or clean, # because they trigger a rebuild of dirty files. diff --git a/Doc/tools/extensions/audit_events.py b/Doc/tools/extensions/audit_events.py index 23d82c0f4414bf..385a58b2145446 100644 --- a/Doc/tools/extensions/audit_events.py +++ b/Doc/tools/extensions/audit_events.py @@ -13,7 +13,7 @@ from sphinx.util.docutils import SphinxDirective if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterator, Set from sphinx.application import Sphinx from sphinx.builders import Builder @@ -33,7 +33,7 @@ class AuditEvents: def __init__(self) -> None: self.events: dict[str, list[str]] = {} - self.sources: dict[str, list[tuple[str, str]]] = {} + self.sources: dict[str, set[tuple[str, str]]] = {} def __iter__(self) -> Iterator[tuple[str, list[str], tuple[str, str]]]: for name, args in self.events.items(): @@ -47,7 +47,7 @@ def add_event( self._check_args_match(name, args) else: self.events[name] = args - self.sources.setdefault(name, []).append(source) + self.sources.setdefault(name, set()).add(source) def _check_args_match(self, name: str, args: list[str]) -> None: current_args = self.events[name] @@ -69,11 +69,11 @@ def _check_args_match(self, name: str, args: list[str]) -> None: return def id_for(self, name) -> str: - source_count = len(self.sources.get(name, ())) + source_count = len(self.sources.get(name, set())) name_clean = re.sub(r"\W", "_", name) return f"audit_event_{name_clean}_{source_count}" - def rows(self) -> Iterator[tuple[str, list[str], list[tuple[str, str]]]]: + def rows(self) -> Iterator[tuple[str, list[str], Set[tuple[str, str]]]]: for name in sorted(self.events.keys()): yield name, self.events[name], self.sources[name] @@ -218,7 +218,7 @@ def _make_row( docname: str, name: str, args: list[str], - sources: list[tuple[str, str]], + sources: Set[tuple[str, str]], ) -> nodes.row: row = nodes.row() name_node = nodes.paragraph("", nodes.Text(name)) @@ -233,7 +233,7 @@ def _make_row( row += nodes.entry("", args_node) backlinks_node = nodes.paragraph() - backlinks = enumerate(sorted(set(sources)), start=1) + backlinks = enumerate(sorted(sources), start=1) for i, (doc, label) in backlinks: if isinstance(label, str): ref = nodes.reference("", f"[{i}]", internal=True) @@ -258,7 +258,7 @@ def setup(app: Sphinx): app.connect("env-purge-doc", audit_events_purge) app.connect("env-merge-info", audit_events_merge) return { - "version": "1.0", + "version": "2.0", "parallel_read_safe": True, "parallel_write_safe": True, } diff --git a/Doc/tools/extensions/c_annotations.py b/Doc/tools/extensions/c_annotations.py index 089614a1f6c421..e04a5f144c449b 100644 --- a/Doc/tools/extensions/c_annotations.py +++ b/Doc/tools/extensions/c_annotations.py @@ -154,7 +154,10 @@ def add_annotations(app: Sphinx, doctree: nodes.document) -> None: node.insert(0, annotation) -def _stable_abi_annotation(record: StableABIEntry) -> nodes.emphasis: +def _stable_abi_annotation( + record: StableABIEntry, + is_corresponding_slot: bool = False, +) -> nodes.emphasis: """Create the Stable ABI annotation. These have two forms: @@ -168,9 +171,28 @@ def _stable_abi_annotation(record: StableABIEntry) -> nodes.emphasis: ... all of which can have "since version X.Y" appended. """ stable_added = record.added - message = sphinx_gettext("Part of the") - message = message.center(len(message) + 2) - emph_node = nodes.emphasis(message, message, classes=["stableabi"]) + emph_node = nodes.emphasis('', '', classes=["stableabi"]) + if is_corresponding_slot: + # See "Type slot annotations" in add_annotations + ref_node = addnodes.pending_xref( + "slot ID", + refdomain="c", + reftarget="PyType_Slot", + reftype="type", + refexplicit="True", + ) + ref_node += nodes.Text(sphinx_gettext("slot ID")) + + message = sphinx_gettext("The corresponding") + emph_node += nodes.Text(" " + message + " ") + emph_node += ref_node + emph_node += nodes.Text(" ") + emph_node += nodes.literal(record.name, record.name) + message = sphinx_gettext("is part of the") + emph_node += nodes.Text(" " + message + " ") + else: + message = sphinx_gettext("Part of the") + emph_node += nodes.Text(" " + message + " ") ref_node = addnodes.pending_xref( "Stable ABI", refdomain="std", @@ -265,6 +287,51 @@ def run(self) -> list[nodes.Node]: return [node] +class CorrespondingTypeSlot(SphinxDirective): + """Type slot annotations + + Docs for these are with the corresponding field, for example, + "Py_tp_repr" is documented under "PyTypeObject.tp_repr", with + only a stable ABI note mentioning "Py_tp_repr" (and linking to + docs on how this works). + + If there is no corresponding field, these should be documented as normal + macros. + """ + + has_content = False + + required_arguments = 1 + optional_arguments = 0 + + def run(self) -> list[nodes.Node]: + name = self.arguments[0] + state = self.env.domaindata["c_annotations"] + stable_abi_data = state["stable_abi_data"] + + try: + record = stable_abi_data[name] + except LookupError as err: + raise LookupError( + f"{name} is not part of stable ABI. " + + "Document it as `c:macro::` rather than " + + "`corresponding-type-slot::`." + ) from err + + annotation = _stable_abi_annotation(record, is_corresponding_slot=True) + + node = nodes.paragraph() + content = [ + ".. c:namespace:: NULL", + "", + ".. c:macro:: " + name, + " :no-typesetting:", + ] + self.state.nested_parse(StringList(content), 0, node) + node.insert(0, annotation) + return [node] + + def init_annotations(app: Sphinx) -> None: # Using domaindata is a bit hack-ish, # but allows storing state without a global variable or closure. @@ -281,6 +348,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value("refcount_file", "", "env", types={str}) app.add_config_value("stable_abi_file", "", "env", types={str}) app.add_directive("limited-api-list", LimitedAPIList) + app.add_directive("corresponding-type-slot", CorrespondingTypeSlot) app.connect("builder-inited", init_annotations) app.connect("doctree-read", add_annotations) diff --git a/Doc/tools/extensions/glossary_search.py b/Doc/tools/extensions/glossary_search.py index 502b6cd95bcb94..bcda4b7b435593 100644 --- a/Doc/tools/extensions/glossary_search.py +++ b/Doc/tools/extensions/glossary_search.py @@ -38,7 +38,7 @@ def process_glossary_nodes( rendered = app.builder.render_partial(definition) terms[term.lower()] = { 'title': term, - 'body': rendered['html_body'], + 'body': rendered['fragment'], } diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index fd6b8f95f736fd..aa1eb5cff9ac12 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -24,14 +24,6 @@ # Used in conf.py and updated here by python/release-tools/run_release.py SOURCE_URI = 'https://github.com/python/cpython/tree/3.14/%s' -# monkey-patch reST parser to disable alphabetic and roman enumerated lists -from docutils.parsers.rst.states import Body -Body.enum.converters['loweralpha'] = \ - Body.enum.converters['upperalpha'] = \ - Body.enum.converters['lowerroman'] = \ - Body.enum.converters['upperroman'] = lambda x: None - - class PyAwaitableMixin(object): def handle_signature(self, sig, signode): ret = super(PyAwaitableMixin, self).handle_signature(sig, signode) diff --git a/Doc/tools/templates/customsourcelink.html b/Doc/tools/templates/customsourcelink.html index eb9db9e341bd75..8e271bca1e08c8 100644 --- a/Doc/tools/templates/customsourcelink.html +++ b/Doc/tools/templates/customsourcelink.html @@ -1,11 +1,11 @@ {%- if show_source and has_source and sourcename %}
-

{{ _('This Page') }}

+

{{ _('This page') }}

diff --git a/Doc/tools/templates/download.html b/Doc/tools/templates/download.html index 4645f7d394e29e..c78c650b1cb4bd 100644 --- a/Doc/tools/templates/download.html +++ b/Doc/tools/templates/download.html @@ -27,12 +27,11 @@ {%- endblock -%} {% block body %} -

{% trans %}Download Python {{ dl_version }} Documentation{% endtrans %}

+

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

{% if last_updated %}

{% trans %}Last updated on: {{ last_updated }}.{% endtrans %}

{% endif %} -

{% trans %}To download an archive containing all the documents for this version of -Python in one of various formats, follow one of links in this table.{% endtrans %}

+

{% trans %}Download an archive containing all the documentation for this version of Python:{% endtrans %}

@@ -40,50 +39,41 @@

{% trans %}Download Python {{ dl_version }} Documentation{% endtrans %}

- - - - - - - + + - - + + - - + + - +
{% trans %}Packed as .zip{% endtrans %} {% trans %}Packed as .tar.bz2{% endtrans %}
{% trans %}PDF{% endtrans %}{% trans download_size="17" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans download_size="17" %}Download (ca. {{ download_size }} MiB){% endtrans %}
{% trans %}HTML{% endtrans %}{% trans download_size="13" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans download_size="8" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans %}Download{% endtrans %}{% trans %}Download{% endtrans %}
{% trans %}Plain text{% endtrans %}{% trans download_size="4" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans download_size="3" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans %}Download{% endtrans %}{% trans %}Download{% endtrans %}
{% trans %}Texinfo{% endtrans %}{% trans download_size="9" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans download_size="7" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans %}Download{% endtrans %}{% trans %}Download{% endtrans %}
{% trans %}EPUB{% endtrans %}{% trans download_size="6" %}Download (ca. {{ download_size }} MiB){% endtrans %}{% trans %}Download{% endtrans %}
-

{% trans %}These archives contain all the content in the documentation.{% endtrans %}

- - -

{% trans %}Unpacking{% endtrans %}

- -

{% trans %}Unix users should download the .tar.bz2 archives; these are bzipped tar -archives and can be handled in the usual way using tar and the bzip2 -program. The Info-ZIP unzip program can be -used to handle the ZIP archives if desired. The .tar.bz2 archives provide the -best compression and fastest download times.{% endtrans %}

- -

{% trans %}Windows users can use the ZIP archives since those are customary on that -platform. These are created on Unix using the Info-ZIP zip program.{% endtrans %}

+

{% trans %} +We no longer provide pre-built PDFs of the documentation. +To build a PDF archive, follow the instructions in the +Developer's Guide +and run make dist-pdf in the Doc/ directory of a copy of the CPython repository. +{% endtrans %}

+

{% trans %} +See the directory listing +for file sizes.{% endtrans %}

{% trans %}Problems{% endtrans %}

- -

{% trans %}If you have comments or suggestions for the Python documentation, please send -email to docs@python.org.{% endtrans %}

+{% set bugs = pathto('bugs') %} +

{% trans bugs = bugs %}Open an issue +if you have comments or suggestions for the Python documentation.{% endtrans %}

{% endblock %} diff --git a/Doc/tools/templates/dummy.html b/Doc/tools/templates/dummy.html index 0fdbe2a58017ff..75f6607d8f3698 100644 --- a/Doc/tools/templates/dummy.html +++ b/Doc/tools/templates/dummy.html @@ -27,8 +27,8 @@ In extensions/changes.py: -{% trans %}Deprecated since version {deprecated}, will be removed in version {removed}{% endtrans %} -{% trans %}Deprecated since version {deprecated}, removed in version {removed}{% endtrans %} +{% trans %}Deprecated since version %s, will be removed in version %s{% endtrans %} +{% trans %}Deprecated since version %s, removed in version %s{% endtrans %} In docsbuild-scripts, when rewriting indexsidebar.html with actual versions: diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index 06a4223643a05a..544cc4234f441e 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -72,7 +72,7 @@

{{ docstitle|e }}

- + diff --git a/Doc/tools/templates/indexsidebar.html b/Doc/tools/templates/indexsidebar.html index eea29e2449a9cf..086f15662cf87b 100644 --- a/Doc/tools/templates/indexsidebar.html +++ b/Doc/tools/templates/indexsidebar.html @@ -9,9 +9,9 @@

{% trans %}Docs by version{% endtrans %}

{% trans %}Other resources{% endtrans %}

diff --git a/Doc/tools/templates/layout.html b/Doc/tools/templates/layout.html index 56023ebf962703..54c6eb9b5ef7e4 100644 --- a/Doc/tools/templates/layout.html +++ b/Doc/tools/templates/layout.html @@ -26,11 +26,11 @@ {% endblock %} {% block extrahead %} - {% if builder == "html" and enable_analytics %} - - {% endif %} - - {% if builder != "htmlhelp" %} + {% if builder == "html" %} + {% if enable_analytics %} + + {% endif %} + {% if pagename == 'whatsnew/changelog' and not embedded %} {% endif %} {% endif %} diff --git a/Doc/tutorial/appendix.rst b/Doc/tutorial/appendix.rst index 6a1611afadb57c..5020c428741feb 100644 --- a/Doc/tutorial/appendix.rst +++ b/Doc/tutorial/appendix.rst @@ -15,7 +15,7 @@ basic interpreter is supported on all platforms with minimal line control capabilities. On Windows, or Unix-like systems with :mod:`curses` support, -a new interactive shell is used by default. +a new interactive shell is used by default since Python 3.13. This one supports color, multiline editing, history browsing, and paste mode. To disable color, see :ref:`using-on-controlling-color` for details. Function keys provide some additional functionality. diff --git a/Doc/tutorial/classes.rst b/Doc/tutorial/classes.rst index 9d0fab8861d2a9..645acdf20fb580 100644 --- a/Doc/tutorial/classes.rst +++ b/Doc/tutorial/classes.rst @@ -359,7 +359,7 @@ Usually, a method is called right after it is bound:: x.f() -In the :class:`!MyClass` example, this will return the string ``'hello world'``. +If ``x = MyClass()``, as above, this will return the string ``'hello world'``. However, it is not necessary to call a method right away: ``x.f`` is a method object, and can be stored away and called at a later time. For example:: diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 95939242fb7d44..5ec8789f98c701 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -251,6 +251,7 @@ statements: a ``try`` statement's ``else`` clause runs when no exception occurs, and a loop's ``else`` clause runs when no ``break`` occurs. For more on the ``try`` statement and exceptions, see :ref:`tut-handling`. +.. index:: single: ...; ellipsis literal .. _tut-pass: :keyword:`!pass` Statements @@ -277,6 +278,12 @@ at a more abstract level. The :keyword:`!pass` is silently ignored:: ... pass # Remember to implement this! ... +For this last case, many people use the ellipsis literal :code:`...` instead of +:code:`pass`. This use has no special meaning to Python, and is not part of +the language definition (you could use any constant expression here), but +:code:`...` is used conventionally as a placeholder body as well. +See :ref:`bltin-ellipsis-object`. + .. _tut-match: @@ -561,7 +568,7 @@ This example, as usual, demonstrates some new Python features: Different types define different methods. Methods of different types may have the same name without causing ambiguity. (It is possible to define your own object types and methods, using *classes*, see :ref:`tut-classes`) - The method :meth:`!append` shown in the example is defined for list objects; it + The method :meth:`~list.append` shown in the example is defined for list objects; it adds a new element at the end of the list. In this example it is equivalent to ``result = result + [a]``, but more efficient. @@ -999,7 +1006,8 @@ scope:: 43 The above example uses a lambda expression to return a function. Another use -is to pass a small function as an argument:: +is to pass a small function as an argument. For instance, :meth:`list.sort` +takes a sorting key function *key* which can be a lambda function:: >>> pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')] >>> pairs.sort(key=lambda pair: pair[1]) @@ -1055,7 +1063,7 @@ Here is an example of a multi-line docstring:: >>> print(my_function.__doc__) Do nothing, but document it. - No, really, it doesn't do anything. + No, really, it doesn't do anything. .. _tut-annotations: @@ -1073,7 +1081,7 @@ Function Annotations information about the types used by user-defined functions (see :pep:`3107` and :pep:`484` for more information). -:term:`Annotations ` are stored in the :attr:`!__annotations__` +:term:`Annotations ` are stored in the :attr:`~object.__annotations__` attribute of the function as a dictionary and have no effect on any other part of the function. Parameter annotations are defined by a colon after the parameter name, followed by an expression evaluating to the value of the annotation. Return annotations are diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index cbe780e075baf5..7e02e74177c457 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -12,9 +12,8 @@ and adds some new things as well. More on Lists ============= -The list data type has some more methods. Here are all of the methods of list -objects: - +The :ref:`list ` data type has some more methods. Here are all +of the methods of list objects: .. method:: list.append(x) :noindex: @@ -62,7 +61,7 @@ objects: .. method:: list.index(x[, start[, end]]) :noindex: - Return zero-based index in the list of the first item whose value is equal to *x*. + Return zero-based index of the first occurrence of *x* in the list. Raises a :exc:`ValueError` if there is no such item. The optional arguments *start* and *end* are interpreted as in the slice @@ -142,8 +141,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`!append`. To retrieve an item from the -top of the stack, use :meth:`!pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`~list.append`. To retrieve an item from the +top of the stack, use :meth:`~list.pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +339,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`!pop` method +value: the :keyword:`del` statement. This differs from the :meth:`~list.pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -445,10 +444,11 @@ packing and sequence unpacking. Sets ==== -Python also includes a data type for *sets*. A set is an unordered collection -with no duplicate elements. Basic uses include membership testing and -eliminating duplicate entries. Set objects also support mathematical operations -like union, intersection, difference, and symmetric difference. +Python also includes a data type for :ref:`sets `. A set is +an unordered collection with no duplicate elements. Basic uses include +membership testing and eliminating duplicate entries. Set objects also +support mathematical operations like union, intersection, difference, and +symmetric difference. Curly braces or the :func:`set` function can be used to create sets. Note: to create an empty set you have to use ``set()``, not ``{}``; the latter creates an @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`!append` and -:meth:`!extend`. +assignments, slice assignments, or methods like :meth:`~list.append` and +:meth:`~list.extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of @@ -512,8 +512,12 @@ dictionary; this is also the way dictionaries are written on output. The main operations on a dictionary are storing a value with some key and extracting the value given the key. It is also possible to delete a key:value pair with ``del``. If you store using a key that is already in use, the old -value associated with that key is forgotten. It is an error to extract a value -using a non-existent key. +value associated with that key is forgotten. + +Extracting a value for a non-existent key by subscripting (``d[key]``) raises a +:exc:`KeyError`. To avoid getting this error when trying to access a possibly +non-existent key, use the :meth:`~dict.get` method instead, which returns +``None`` (or a specified default value) if the key is not in the dictionary. Performing ``list(d)`` on a dictionary returns a list of all the keys used in the dictionary, in insertion order (if you want it sorted, just use @@ -528,6 +532,12 @@ Here is a small example using a dictionary:: {'jack': 4098, 'sape': 4139, 'guido': 4127} >>> tel['jack'] 4098 + >>> tel['irv'] + Traceback (most recent call last): + File "", line 1, in + KeyError: 'irv' + >>> print(tel.get('irv')) + None >>> del tel['sape'] >>> tel['irv'] = 4127 >>> tel diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst index 96791f88c867ab..20fe161be4acc2 100644 --- a/Doc/tutorial/index.rst +++ b/Doc/tutorial/index.rst @@ -4,6 +4,10 @@ The Python Tutorial ###################### +.. Tip:: This tutorial is designed for + *programmers* that are new to the Python language, + **not** *beginners* who are new to programming. + Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, @@ -11,7 +15,7 @@ together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms. The Python interpreter and the extensive standard library are freely available -in source or binary form for all major platforms from the Python web site, +in source or binary form for all major platforms from the Python website, https://www.python.org/, and may be freely distributed. The same site also contains distributions of and pointers to many free third party Python modules, programs and tools, and additional documentation. @@ -21,7 +25,8 @@ implemented in C or C++ (or other languages callable from C). Python is also suitable as an extension language for customizable applications. This tutorial introduces the reader informally to the basic concepts and -features of the Python language and system. It helps to have a Python +features of the Python language and system. Be aware that it expects you to +have a basic understanding of programming in general. It helps to have a Python interpreter handy for hands-on experience, but all examples are self-contained, so the tutorial can be read off-line as well. diff --git a/Doc/tutorial/inputoutput.rst b/Doc/tutorial/inputoutput.rst index 35b8c7cd8eb049..a00f06cf46c41a 100644 --- a/Doc/tutorial/inputoutput.rst +++ b/Doc/tutorial/inputoutput.rst @@ -95,10 +95,11 @@ Some examples:: >>> repr((x, y, ('spam', 'eggs'))) "(32.5, 40000, ('spam', 'eggs'))" -The :mod:`string` module contains a :class:`~string.Template` class that offers -yet another way to substitute values into strings, using placeholders like -``$x`` and replacing them with values from a dictionary, but offers much less -control of the formatting. +The :mod:`string` module contains support for a simple templating approach +based upon regular expressions, via :class:`string.Template`. +This offers yet another way to substitute values into strings, +using placeholders like ``$x`` and replacing them with values from a dictionary. +This syntax is easy to use, although it offers much less control for formatting. .. index:: single: formatted string literal diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst index bec5da8fd759ac..deabac5253051c 100644 --- a/Doc/tutorial/introduction.rst +++ b/Doc/tutorial/introduction.rst @@ -13,10 +13,9 @@ end a multi-line command. .. only:: html - You can toggle the display of prompts and output by clicking on ``>>>`` - in the upper-right corner of an example box. If you hide the prompts - and output for an example, then you can easily copy and paste the input - lines into your interpreter. + You can use the "Copy" button (it appears in the upper-right corner + when hovering over or tapping a code example), which strips prompts + and omits output, to copy and paste the input lines into your interpreter. .. index:: single: # (hash); comment @@ -50,7 +49,7 @@ primary prompt, ``>>>``. (It shouldn't take long.) Numbers ------- -The interpreter acts as a simple calculator: you can type an expression at it +The interpreter acts as a simple calculator: you can type an expression into it and it will write the value. Expression syntax is straightforward: the operators ``+``, ``-``, ``*`` and ``/`` can be used to perform arithmetic; parentheses (``()``) can be used for grouping. @@ -147,6 +146,8 @@ Python can manipulate text (represented by type :class:`str`, so-called "``Yay! :)``". They can be enclosed in single quotes (``'...'``) or double quotes (``"..."``) with the same result [#]_. +.. code-block:: pycon + >>> 'spam eggs' # single quotes 'spam eggs' >>> "Paris rabbit got your back :)! Yay!" # double quotes @@ -419,7 +420,7 @@ type, i.e. it is possible to change their content:: [1, 8, 27, 64, 125] You can also add new items at the end of the list, by using -the :meth:`!list.append` *method* (we will see more about methods later):: +the :meth:`list.append` *method* (we will see more about methods later):: >>> cubes.append(216) # add the cube of 6 >>> cubes.append(7 ** 3) # and the cube of 7 diff --git a/Doc/tutorial/modules.rst b/Doc/tutorial/modules.rst index de7aa0e2342946..f8105cd5441fec 100644 --- a/Doc/tutorial/modules.rst +++ b/Doc/tutorial/modules.rst @@ -27,14 +27,16 @@ called :file:`fibo.py` in the current directory with the following contents:: # Fibonacci numbers module - def fib(n): # write Fibonacci series up to n + def fib(n): + """Write Fibonacci series up to n.""" a, b = 0, 1 while a < n: print(a, end=' ') a, b = b, a+b print() - def fib2(n): # return Fibonacci series up to n + def fib2(n): + """Return Fibonacci series up to n.""" result = [] a, b = 0, 1 while a < n: @@ -577,8 +579,8 @@ module for example, you might use:: from .. import formats from ..filters import equalizer -Note that relative imports are based on the name of the current module. Since -the name of the main module is always ``"__main__"``, modules intended for use +Note that relative imports are based on the name of the current module's package. +Since the main module does not have a package, modules intended for use as the main module of a Python application must always use absolute imports. diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index 4b3eef313e76d7..163f3bdebd8546 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -183,13 +183,13 @@ protocols. Two of the simplest are :mod:`urllib.request` for retrieving data from URLs and :mod:`smtplib` for sending mail:: >>> from urllib.request import urlopen - >>> with urlopen('http://worldtimeapi.org/api/timezone/etc/UTC.txt') as response: + >>> with urlopen('https://docs.python.org/3/') as response: ... for line in response: ... line = line.decode() # Convert bytes to a str - ... if line.startswith('datetime'): + ... if 'updated' in line: ... print(line.rstrip()) # Remove trailing newline ... - datetime: 2022-01-01T01:36:47.689215+00:00 + Last updated on Nov 11, 2025 (20:11 UTC). >>> import smtplib >>> server = smtplib.SMTP('localhost') @@ -335,7 +335,7 @@ sophisticated and robust capabilities of its larger packages. For example: names, no direct knowledge or handling of XML is needed. * The :mod:`email` package is a library for managing email messages, including - MIME and other :rfc:`2822`-based message documents. Unlike :mod:`smtplib` and + MIME and other :rfc:`5322`-based message documents. Unlike :mod:`smtplib` and :mod:`poplib` which actually send and receive messages, the email package has a complete toolset for building or decoding complex message structures (including attachments) and for implementing internet encoding and header diff --git a/Doc/tutorial/whatnow.rst b/Doc/tutorial/whatnow.rst index dbe2d7fc09927e..359cf80a7b2ecf 100644 --- a/Doc/tutorial/whatnow.rst +++ b/Doc/tutorial/whatnow.rst @@ -30,7 +30,7 @@ the set are: More Python resources: -* https://www.python.org: The major Python web site. It contains code, +* https://www.python.org: The major Python website. It contains code, documentation, and pointers to Python-related pages around the web. * https://docs.python.org: Fast access to Python's documentation. diff --git a/Doc/using/android.rst b/Doc/using/android.rst index 65bf23dc994856..45345d045ddfd9 100644 --- a/Doc/using/android.rst +++ b/Doc/using/android.rst @@ -40,8 +40,15 @@ If you're sure you want to do all of this manually, read on. You can use the :source:`testbed app ` as a guide; each step below contains a link to the relevant file. -* Build Python by following the instructions in :source:`Android/README.md`. - This will create the directory ``cross-build/HOST/prefix``. +* First, acquire a build of Python for Android: + + * The easiest way is to download an Android release from `python.org + `__. The ``prefix`` directory + mentioned below is at the top level of the package. + + * Or if you want to build it yourself, follow the instructions in + :source:`Android/README.md`. The ``prefix`` directory will be created under + :samp:`cross-build/{HOST}`. * Add code to your :source:`build.gradle ` file to copy the following items into your project. All except your own Python @@ -63,3 +70,12 @@ link to the relevant file. * Add code to your app to :source:`start Python in embedded mode `. This will need to be C code called via JNI. + +Building a Python package for Android +------------------------------------- + +Python packages can be built for Android as wheels and released on PyPI. The +recommended tool for doing this is `cibuildwheel +`__, which automates +all the details of setting up a cross-compilation environment, building the +wheel, and testing it on an emulator. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 40a46a62031ef7..24541e84732faf 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -369,8 +369,8 @@ Miscellaneous options .. option:: -R Turn on hash randomization. This option only has an effect if the - :envvar:`PYTHONHASHSEED` environment variable is set to ``0``, since hash - randomization is enabled by default. + :envvar:`PYTHONHASHSEED` environment variable is set to anything other + than ``random``, since hash randomization is enabled by default. On previous versions of Python, this option turns on hash randomization, so that the :meth:`~object.__hash__` values of str and bytes objects @@ -653,7 +653,7 @@ Miscellaneous options .. versionadded:: 3.13 * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread` - to, by default, use a copy of context of of the caller of + to, by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, threads will start with an empty context. If unset, the value of this option defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also @@ -669,6 +669,13 @@ Miscellaneous options .. versionadded:: 3.14 + * :samp:`-X tlbc={0,1}` enables (1, the default) or disables (0) thread-local + bytecode in builds configured with :option:`--disable-gil`. When disabled, + this also disables the specializing interpreter. See also + :envvar:`PYTHON_TLBC`. + + .. versionadded:: 3.14 + It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -1249,9 +1256,8 @@ conflict. .. envvar:: PYTHON_BASIC_REPL If this variable is set to any value, the interpreter will not attempt to - load the Python-based :term:`REPL` that requires :mod:`curses` and - :mod:`readline`, and will instead use the traditional parser-based - :term:`REPL`. + load the Python-based :term:`REPL` that requires :mod:`readline`, and will + instead use the traditional parser-based :term:`REPL`. .. versionadded:: 3.13 @@ -1277,7 +1283,7 @@ conflict. .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT If this variable is set to ``1`` then :class:`~threading.Thread` will, - by default, use a copy of context of of the caller of ``Thread.start()`` + by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, new threads will start with an empty context. If unset, this variable defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also :option:`-X thread_inherit_context<-X>`. @@ -1302,6 +1308,16 @@ conflict. .. versionadded:: 3.13 +.. envvar:: PYTHON_TLBC + + If set to ``1`` enables thread-local bytecode. If set to ``0`` thread-local + bytecode and the specializing interpreter are disabled. Only applies to + builds configured with :option:`--disable-gil`. + + See also the :option:`-X tlbc <-X>` command-line option. + + .. versionadded:: 3.14 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index b914d3397b6dda..5c61197cec0bcf 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -4,10 +4,13 @@ Configure Python .. highlight:: sh + +.. _build-requirements: + Build Requirements ================== -Features and minimum versions required to build CPython: +To build CPython, you will need: * A `C11 `_ compiler. `Optional C11 features @@ -22,47 +25,136 @@ Features and minimum versions required to build CPython: * Support for threads. -* OpenSSL 1.1.1 is the minimum version and OpenSSL 3.0.16 is the recommended - minimum version for the :mod:`ssl` and :mod:`hashlib` extension modules. +.. versionchanged:: 3.5 + On Windows, Visual Studio 2015 or later is now required. + +.. versionchanged:: 3.6 + Selected C99 features, like ```` and ``static inline`` functions, + are now required. + +.. versionchanged:: 3.7 + Thread support is now required. + +.. versionchanged:: 3.11 + C11 compiler, IEEE 754 and NaN support are now required. + On Windows, Visual Studio 2017 or later is required. -* SQLite 3.15.2 for the :mod:`sqlite3` extension module. +See also :pep:`7` "Style Guide for C Code" and :pep:`11` "CPython platform +support". -* Tcl/Tk 8.5.12 for the :mod:`tkinter` module. -* Autoconf 2.72 and aclocal 1.16.5 are required to regenerate the - :file:`configure` script. +.. _optional-module-requirements: + +Requirements for optional modules +--------------------------------- + +Some :term:`optional modules ` of the standard library +require third-party libraries installed for development +(for example, header files must be available). + +Missing requirements are reported in the ``configure`` output. +Modules that are missing due to missing dependencies are listed near the end +of the ``make`` output, +sometimes using an internal name, for example, ``_ctypes`` for :mod:`ctypes` +module. + +If you distribute a CPython interpreter without optional modules, +it's best practice to advise users, who generally expect that +standard library modules are available. + +Dependencies to build optional modules are: + +.. list-table:: + :header-rows: 1 + :align: left + + * - Dependency + - Minimum version + - Python module + * - `libbz2 `_ + - + - :mod:`bz2` + * - `libffi `_ + - 3.3.0 recommended + - :mod:`ctypes` + * - `liblzma `_ + - + - :mod:`lzma` + * - `libmpdec `_ + - 2.5.0 + - :mod:`decimal` [1]_ + * - `libreadline `_ or + `libedit `_ [2]_ + - + - :mod:`readline` + * - `libuuid `_ + - + - ``_uuid`` [3]_ + * - `ncurses `_ [4]_ + - + - :mod:`curses` + * - `OpenSSL `_ + - | 3.0.18 recommended + | (1.1.1 minimum) + - :mod:`ssl`, :mod:`hashlib` [5]_ + * - `SQLite `_ + - 3.15.2 + - :mod:`sqlite3` + * - `Tcl/Tk `_ + - 8.5.12 + - :mod:`tkinter`, :ref:`IDLE `, :mod:`turtle` + * - `zlib `_ + - 1.2.2.1 + - :mod:`zlib`, :mod:`gzip`, :mod:`ensurepip` + * - `zstd `_ + - 1.4.5 + - :mod:`compression.zstd` + +.. [1] If *libmpdec* is not available, the :mod:`decimal` module will use + a pure-Python implementation. + See :option:`--with-system-libmpdec` for details. +.. [2] See :option:`--with-readline` for choosing the backend for the + :mod:`readline` module. +.. [3] The :mod:`uuid` module uses ``_uuid`` to generate "safe" UUIDs. + See the module documentation for details. +.. [4] The :mod:`curses` module requires the ``libncurses`` or ``libncursesw`` + library. + The :mod:`curses.panel` module additionally requires the ``libpanel`` or + ``libpanelw`` library. +.. [5] If OpenSSL is not available, the :mod:`hashlib` module will use + bundled implementations of several hash functions. + See :option:`--with-builtin-hashlib-hashes` for *forcing* usage of OpenSSL. + +Note that the table does not include all optional modules; in particular, +platform-specific modules like :mod:`winreg` are not listed here. + +.. seealso:: + + * The `devguide `_ + includes a full list of dependencies required to build all modules and + instructions on how to install them on common platforms. + * :option:`--with-system-expat` allows building with an external + `libexpat `_ library. + * :ref:`configure-options-for-dependencies` .. versionchanged:: 3.1 - Tcl/Tk version 8.3.1 is now required. + Tcl/Tk version 8.3.1 is now required for :mod:`tkinter`. .. versionchanged:: 3.5 - On Windows, Visual Studio 2015 or later is now required. - Tcl/Tk version 8.4 is now required. - -.. versionchanged:: 3.6 - Selected C99 features are now required, like ```` and ``static - inline`` functions. + Tcl/Tk version 8.4 is now required for :mod:`tkinter`. .. versionchanged:: 3.7 - Thread support and OpenSSL 1.0.2 are now required. + OpenSSL 1.0.2 is now required for :mod:`hashlib` and :mod:`ssl`. .. versionchanged:: 3.10 - OpenSSL 1.1.1 is now required. - Require SQLite 3.7.15. + OpenSSL 1.1.1 is now required for :mod:`hashlib` and :mod:`ssl`. + SQLite 3.7.15 is now required for :mod:`sqlite3`. .. versionchanged:: 3.11 - C11 compiler, IEEE 754 and NaN support are now required. - On Windows, Visual Studio 2017 or later is required. - Tcl/Tk version 8.5.12 is now required for the :mod:`tkinter` module. + Tcl/Tk version 8.5.12 is now required for :mod:`tkinter`. .. versionchanged:: 3.13 - Autoconf 2.71, aclocal 1.16.5 and SQLite 3.15.2 are now required. - -.. versionchanged:: 3.14 - Autoconf 2.72 is now required. - -See also :pep:`7` "Style Guide for C Code" and :pep:`11` "CPython platform -support". + SQLite 3.15.2 is now required for :mod:`sqlite3`. Generated files @@ -91,8 +183,19 @@ The container is optional, the following command can be run locally:: autoreconf -ivf -Werror -The generated files can change depending on the exact ``autoconf-archive``, -``aclocal`` and ``pkg-config`` versions. +The generated files can change depending on the exact versions of the +tools used. +The container that CPython uses has +`Autoconf `_ 2.72, +``aclocal`` from `Automake `_ 1.16.5, +and `pkg-config `_ 1.8.1. + +.. versionchanged:: 3.13 + Autoconf 2.71 and aclocal 1.16.5 and are now used to regenerate + :file:`configure`. + +.. versionchanged:: 3.14 + Autoconf 2.72 is now used to regenerate :file:`configure`. .. _configure-options: @@ -290,8 +393,8 @@ General Options .. option:: --disable-gil - Enables **experimental** support for running Python without the - :term:`global interpreter lock` (GIL): free threading build. + Enables support for running Python without the :term:`global interpreter + lock` (GIL): free threading build. Defines the ``Py_GIL_DISABLED`` macro and adds ``"t"`` to :data:`sys.abiflags`. @@ -370,6 +473,8 @@ Linker options Name for machine-dependent library files. +.. _configure-options-for-dependencies: + Options for third-party dependencies ------------------------------------ @@ -392,12 +497,6 @@ Options for third-party dependencies C compiler and linker flags for ``gdbm``. -.. option:: LIBB2_CFLAGS -.. option:: LIBB2_LIBS - - C compiler and linker flags for ``libb2`` (:ref:`BLAKE2 `), - used by :mod:`hashlib` module, overriding ``pkg-config``. - .. option:: LIBEDIT_CFLAGS .. option:: LIBEDIT_LIBS @@ -445,6 +544,14 @@ Options for third-party dependencies C compiler and linker flags for ``libuuid``, used by :mod:`uuid` module, overriding ``pkg-config``. +.. option:: LIBZSTD_CFLAGS +.. option:: LIBZSTD_LIBS + + C compiler and linker flags for ``libzstd``, used by :mod:`compression.zstd` module, + overriding ``pkg-config``. + + .. versionadded:: 3.14 + .. option:: PANEL_CFLAGS .. option:: PANEL_LIBS @@ -675,6 +782,13 @@ also be used to improve performance. not compiled. This includes both the functionality to schedule code to be executed and the functionality to receive code to be executed. + .. c:macro:: Py_REMOTE_DEBUG + + This macro is defined by default, unless Python is configured with + :option:`--without-remote-debug`. + + Note that even if the macro is defined, remote debugging may not be + available (for example, on an incompatible platform). .. versionadded:: 3.14 @@ -784,6 +898,9 @@ Debug options .. option:: --with-address-sanitizer Enable AddressSanitizer memory error detector, ``asan`` (default is no). + To improve ASan detection capabilities you may also want to combine this + with :option:`--without-pymalloc` to disable the specialized small-object + allocator whose allocations are not tracked by ASan. .. versionadded:: 3.6 @@ -845,9 +962,16 @@ Libraries options .. versionchanged:: 3.13 Default to using the installed ``mpdecimal`` library. - .. deprecated-removed:: 3.13 3.15 + .. versionchanged:: 3.15 + + A bundled copy of the library will no longer be selected + implicitly if an installed ``mpdecimal`` library is not found. + In Python 3.15 only, it can still be selected explicitly using + ``--with-system-libmpdec=no`` or ``--without-system-libmpdec``. + + .. deprecated-removed:: 3.13 3.16 A copy of the ``mpdecimal`` library sources will no longer be distributed - with Python 3.15. + with Python 3.16. .. seealso:: :option:`LIBMPDEC_CFLAGS` and :option:`LIBMPDEC_LIBS`. diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 7d5c6331bef5ce..5e4033fb6cec7a 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -170,7 +170,7 @@ helpful. To add Python to an iOS Xcode project: 1. Build or obtain a Python ``XCFramework``. See the instructions in - :source:`iOS/README.rst` (in the CPython source distribution) for details on + :source:`Apple/iOS/README.md` (in the CPython source distribution) for details on how to build a Python ``XCFramework``. At a minimum, you will need a build that supports ``arm64-apple-ios``, plus one of either ``arm64-apple-ios-simulator`` or ``x86_64-apple-ios-simulator``. @@ -180,22 +180,19 @@ To add Python to an iOS Xcode project: of your project; however, you can use any other location that you want by adjusting paths as needed. -3. Drag the ``iOS/Resources/dylib-Info-template.plist`` file into your project, - and ensure it is associated with the app target. - -4. Add your application code as a folder in your Xcode project. In the +3. Add your application code as a folder in your Xcode project. In the following instructions, we'll assume that your user code is in a folder named ``app`` in the root of your project; you can use any other location by adjusting paths as needed. Ensure that this folder is associated with your app target. -5. Select the app target by selecting the root node of your Xcode project, then +4. Select the app target by selecting the root node of your Xcode project, then the target name in the sidebar that appears. -6. In the "General" settings, under "Frameworks, Libraries and Embedded +5. In the "General" settings, under "Frameworks, Libraries and Embedded Content", add ``Python.xcframework``, with "Embed & Sign" selected. -7. In the "Build Settings" tab, modify the following: +6. In the "Build Settings" tab, modify the following: - Build Options @@ -211,86 +208,24 @@ To add Python to an iOS Xcode project: * Quoted Include In Framework Header: No -8. Add a build step that copies the Python standard library into your app. In - the "Build Phases" tab, add a new "Run Script" build step *before* the - "Embed Frameworks" step, but *after* the "Copy Bundle Resources" step. Name - the step "Install Target Specific Python Standard Library", disable the - "Based on dependency analysis" checkbox, and set the script content to: +7. Add a build step that processes the Python standard library, and your own + Python binary dependencies. In the "Build Phases" tab, add a new "Run + Script" build step *before* the "Embed Frameworks" step, but *after* the + "Copy Bundle Resources" step. Name the step "Process Python libraries", + disable the "Based on dependency analysis" checkbox, and set the script + content to: .. code-block:: bash - set -e - - mkdir -p "$CODESIGNING_FOLDER_PATH/python/lib" - if [ "$EFFECTIVE_PLATFORM_NAME" = "-iphonesimulator" ]; then - echo "Installing Python modules for iOS Simulator" - rsync -au --delete "$PROJECT_DIR/Python.xcframework/ios-arm64_x86_64-simulator/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" - else - echo "Installing Python modules for iOS Device" - rsync -au --delete "$PROJECT_DIR/Python.xcframework/ios-arm64/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" - fi - - Note that the name of the simulator "slice" in the XCframework may be - different, depending the CPU architectures your ``XCFramework`` supports. - -9. Add a second build step that processes the binary extension modules in the - standard library into "Framework" format. Add a "Run Script" build step - *directly after* the one you added in step 8, named "Prepare Python Binary - Modules". It should also have "Based on dependency analysis" unchecked, with - the following script content: + set -e + source $PROJECT_DIR/Python.xcframework/build/build_utils.sh + install_python Python.xcframework app - .. code-block:: bash + If you have placed your XCframework somewhere other than the root of your + project, modify the path to the first argument. - set -e - - install_dylib () { - INSTALL_BASE=$1 - FULL_EXT=$2 - - # The name of the extension file - EXT=$(basename "$FULL_EXT") - # The location of the extension file, relative to the bundle - RELATIVE_EXT=${FULL_EXT#$CODESIGNING_FOLDER_PATH/} - # The path to the extension file, relative to the install base - PYTHON_EXT=${RELATIVE_EXT/$INSTALL_BASE/} - # The full dotted name of the extension module, constructed from the file path. - FULL_MODULE_NAME=$(echo $PYTHON_EXT | cut -d "." -f 1 | tr "/" "."); - # A bundle identifier; not actually used, but required by Xcode framework packaging - FRAMEWORK_BUNDLE_ID=$(echo $PRODUCT_BUNDLE_IDENTIFIER.$FULL_MODULE_NAME | tr "_" "-") - # The name of the framework folder. - FRAMEWORK_FOLDER="Frameworks/$FULL_MODULE_NAME.framework" - - # If the framework folder doesn't exist, create it. - if [ ! -d "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER" ]; then - echo "Creating framework for $RELATIVE_EXT" - mkdir -p "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER" - cp "$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" - plutil -replace CFBundleExecutable -string "$FULL_MODULE_NAME" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" - plutil -replace CFBundleIdentifier -string "$FRAMEWORK_BUNDLE_ID" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist" - fi - - echo "Installing binary for $FRAMEWORK_FOLDER/$FULL_MODULE_NAME" - mv "$FULL_EXT" "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME" - # Create a placeholder .fwork file where the .so was - echo "$FRAMEWORK_FOLDER/$FULL_MODULE_NAME" > ${FULL_EXT%.so}.fwork - # Create a back reference to the .so file location in the framework - echo "${RELATIVE_EXT%.so}.fwork" > "$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME.origin" - } - - PYTHON_VER=$(ls -1 "$CODESIGNING_FOLDER_PATH/python/lib") - echo "Install Python $PYTHON_VER standard library extension modules..." - find "$CODESIGNING_FOLDER_PATH/python/lib/$PYTHON_VER/lib-dynload" -name "*.so" | while read FULL_EXT; do - install_dylib python/lib/$PYTHON_VER/lib-dynload/ "$FULL_EXT" - done - - # Clean up dylib template - rm -f "$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist" - - echo "Signing frameworks as $EXPANDED_CODE_SIGN_IDENTITY_NAME ($EXPANDED_CODE_SIGN_IDENTITY)..." - find "$CODESIGNING_FOLDER_PATH/Frameworks" -name "*.framework" -exec /usr/bin/codesign --force --sign "$EXPANDED_CODE_SIGN_IDENTITY" ${OTHER_CODE_SIGN_FLAGS:-} -o runtime --timestamp=none --preserve-metadata=identifier,entitlements,flags --generate-entitlement-der "{}" \; - -10. Add Objective C code to initialize and use a Python interpreter in embedded - mode. You should ensure that: +8. Add Objective C code to initialize and use a Python interpreter in embedded + mode. You should ensure that: * UTF-8 mode (:c:member:`PyPreConfig.utf8_mode`) is *enabled*; * Buffered stdio (:c:member:`PyConfig.buffered_stdio`) is *disabled*; @@ -298,9 +233,9 @@ To add Python to an iOS Xcode project: * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* (optional, but strongly recommended; this is enabled by default); - * ``PYTHONHOME`` for the interpreter is configured to point at the + * :envvar:`PYTHONHOME` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and - * The ``PYTHONPATH`` for the interpreter includes: + * The :envvar:`PYTHONPATH` for the interpreter includes: - the ``python/lib/python3.X`` subfolder of your app's bundle, - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and @@ -309,45 +244,52 @@ To add Python to an iOS Xcode project: Your app's bundle location can be determined using ``[[NSBundle mainBundle] resourcePath]``. -Steps 8, 9 and 10 of these instructions assume that you have a single folder of +Steps 7 and 8 of these instructions assume that you have a single folder of pure Python application code, named ``app``. If you have third-party binary modules in your app, some additional steps will be required: * You need to ensure that any folders containing third-party binaries are - either associated with the app target, or copied in as part of step 8. Step 8 - should also purge any binaries that are not appropriate for the platform a - specific build is targeting (i.e., delete any device binaries if you're - building an app targeting the simulator). + either associated with the app target, or are explicitly copied as part of + step 7. Step 7 should also purge any binaries that are not appropriate for + the platform a specific build is targeting (i.e., delete any device binaries + if you're building an app targeting the simulator). -* Any folders that contain third-party binaries must be processed into - framework form by step 9. The invocation of ``install_dylib`` that processes - the ``lib-dynload`` folder can be copied and adapted for this purpose. +* If you're using a separate folder for third-party packages, ensure that + folder is added to the end of the call to ``install_python`` in step 7, and + as part of the :envvar:`PYTHONPATH` configuration in step 8. -* If you're using a separate folder for third-party packages, ensure that folder - is included as part of the ``PYTHONPATH`` configuration in step 10. +* If any of the folders that contain third-party packages will contain ``.pth`` + files, you should add that folder as a *site directory* (using + :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or + :attr:`sys.path` directly. Testing a Python package ------------------------ -The CPython source tree contains :source:`a testbed project ` that +The CPython source tree contains :source:`a testbed project ` that is used to run the CPython test suite on the iOS simulator. This testbed can also be used as a testbed project for running your Python library's test suite on iOS. -After building or obtaining an iOS XCFramework (See :source:`iOS/README.rst` -for details), create a clone of the Python iOS testbed project by running: +After building or obtaining an iOS XCFramework (see :source:`Apple/iOS/README.md` +for details), create a clone of the Python iOS testbed project. If you used the +``Apple`` build script to build the XCframework, you can run: .. code-block:: bash - $ python iOS/testbed clone --framework --app --app app-testbed + $ python cross-build/iOS/testbed clone --app --app app-testbed + +Or, if you've sourced your own XCframework, by running: + +.. code-block:: bash -You will need to modify the ``iOS/testbed`` reference to point to that -directory in the CPython source tree; any folders specified with the ``--app`` -flag will be copied into the cloned testbed project. The resulting testbed will -be created in the ``app-testbed`` folder. In this example, the ``module1`` and -``module2`` would be importable modules at runtime. If your project has -additional dependencies, they can be installed into the -``app-testbed/iOSTestbed/app_packages`` folder (using ``pip install --target -app-testbed/iOSTestbed/app_packages`` or similar). + $ python Apple/testbed clone --platform iOS --framework --app --app app-testbed + +Any folders specified with the ``--app`` flag will be copied into the cloned +testbed project. The resulting testbed will be created in the ``app-testbed`` +folder. In this example, the ``module1`` and ``module2`` would be importable +modules at runtime. If your project has additional dependencies, they can be +installed into the ``app-testbed/Testbed/app_packages`` folder (using ``pip +install --target app-testbed/Testbed/app_packages`` or similar). You can then use the ``app-testbed`` folder to run the test suite for your app, For example, if ``module1.tests`` was the entry point to your test suite, you @@ -369,13 +311,29 @@ You can also open the testbed project in Xcode by running: This will allow you to use the full Xcode suite of tools for debugging. +The arguments used to run the test suite are defined as part of the test plan. +To modify the test plan, select the test plan node of the project tree (it +should be the first child of the root node), and select the "Configurations" +tab. Modify the "Arguments Passed On Launch" value to change the testing +arguments. + +The test plan also disables parallel testing, and specifies the use of the +``Testbed.lldbinit`` file for providing configuration of the debugger. The +default debugger configuration disables automatic breakpoints on the +``SIGINT``, ``SIGUSR1``, ``SIGUSR2``, and ``SIGXFSZ`` signals. + App Store Compliance ==================== The only mechanism for distributing apps to third-party iOS devices is to submit the app to the iOS App Store; apps submitted for distribution must pass Apple's app review process. This process includes a set of automated validation -rules that inspect the submitted application bundle for problematic code. +rules that inspect the submitted application bundle for problematic code. There +are some steps that must be taken to ensure that your app will be able to pass +these validation steps. + +Incompatible code in the standard library +----------------------------------------- The Python standard library contains some code that is known to violate these automated rules. While these violations appear to be false positives, Apple's @@ -386,3 +344,18 @@ The Python source tree contains :source:`a patch file ` that will remove all code that is known to cause issues with the App Store review process. This patch is applied automatically when building for iOS. + +Privacy manifests +----------------- + +In April 2025, Apple introduced a requirement for `certain third-party +libraries to provide a Privacy Manifest +`__. +As a result, if you have a binary module that uses one of the affected +libraries, you must provide an ``.xcprivacy`` file for that library. +OpenSSL is one library affected by this requirement, but there are others. + +If you produce a binary module named ``mymodule.so``, and use you the Xcode +build script described in step 7 above, you can place a ``mymodule.xcprivacy`` +file next to ``mymodule.so``, and the privacy manifest will be installed into +the required location when the binary module is converted into a framework. diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index 4b6c884f3d4f25..2fd6eace2b5dda 100644 --- a/Doc/using/mac.rst +++ b/Doc/using/mac.rst @@ -20,13 +20,6 @@ the Pythons provided by the CPython release team for download from the `python.org website `_. See :ref:`alternative_bundles` for some other options. -.. |usemac_x_dot_y| replace:: 3.13 -.. |usemac_python_x_dot_y_literal| replace:: ``python3.13`` -.. |usemac_python_x_dot_y_t_literal| replace:: ``python3.13t`` -.. |usemac_python_x_dot_y_t_literal_config| replace:: ``python3.13t-config`` -.. |usemac_applications_folder_name| replace:: ``Python 3.13`` -.. |usemac_applications_folder_version| replace:: ``/Applications/Python 3.13/`` - .. _getting-osx: .. _getting-and-installing-macpython: @@ -46,7 +39,7 @@ Current installers provide a `universal2 binary `_ build of Python which runs natively on all Macs (Apple Silicon and Intel) that are supported by a wide range of macOS versions, -currently typically from at least **macOS 10.13 High Sierra** on. +currently typically from at least **macOS 10.15 Catalina** on. The downloaded file is a standard macOS installer package file (``.pkg``). File integrity information (checksum, size, sigstore signature, etc) for each file is included @@ -64,7 +57,7 @@ Clicking on the **Continue** button brings up the **Read Me** for this installer Besides other important information, the **Read Me** documents which Python version is going to be installed and on what versions of macOS it is supported. You may need to scroll through to read the whole file. By default, this **Read Me** will also be -installed in |usemac_applications_folder_version| and available to read anytime. +installed in |applications_python_version_literal| and available to read anytime. .. image:: mac_installer_02_readme.png @@ -83,7 +76,7 @@ display. For most uses, the standard set of installation operations is appropria By pressing the **Customize** button, you can choose to omit or select certain package components of the installer. Click on each package name to see a description of what it installs. -To also install support for the optional experimental free-threaded feature, +To also install support for the optional free-threaded feature, see :ref:`install-freethreaded-macos`. .. image:: mac_installer_05_custom_install.png @@ -97,7 +90,7 @@ When the installation is complete, the **Summary** window will appear. .. image:: mac_installer_06_summary.png Double-click on the :command:`Install Certificates.command` -icon or file in the |usemac_applications_folder_version| window to complete the +icon or file in the |applications_python_version_literal| window to complete the installation. .. image:: mac_installer_07_applications.png @@ -114,7 +107,7 @@ Close this terminal window and the installer window. A default install will include: -* A |usemac_applications_folder_name| folder in your :file:`Applications` folder. In here +* A |python_version_literal| folder in your :file:`Applications` folder. In here you find :program:`IDLE`, the development environment that is a standard part of official Python distributions; and :program:`Python Launcher`, which handles double-clicking Python scripts from the macOS `Finder `_. @@ -141,7 +134,7 @@ How to run a Python script There are two ways to invoke the Python interpreter. If you are familiar with using a Unix shell in a terminal -window, you can invoke |usemac_python_x_dot_y_literal| or ``python3`` optionally +window, you can invoke |python_x_dot_y_literal| or ``python3`` optionally followed by one or more command line options (described in :ref:`using-on-general`). The Python tutorial also has a useful section on :ref:`using Python interactively from a shell `. @@ -160,7 +153,7 @@ for more information. To run a Python script file from the terminal window, you can invoke the interpreter with the name of the script file: - |usemac_python_x_dot_y_literal| ``myscript.py`` + |python_x_dot_y_literal| ``myscript.py`` To run your script from the Finder, you can either: @@ -259,20 +252,20 @@ Advanced Topics Installing Free-threaded Binaries --------------------------------- -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. +.. versionadded:: 3.13 The ``python.org`` :ref:`Python for macOS ` installer package can optionally install an additional build of -Python |usemac_x_dot_y| that supports :pep:`703`, the experimental free-threading feature +Python |version| that supports :pep:`703`, the free-threading feature (running with the :term:`global interpreter lock` disabled). Check the release page on ``python.org`` for possible updated information. -Because this feature is still considered experimental, the support for it +The free-threaded mode is working and continues to be improved, but +there is some additional overhead in single-threaded workloads compared +to the regular build. Additionally, third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. +Therefore, the support for free-threading is not installed by default. It is packaged as a separate install option, available by clicking the **Customize** button on the **Installation Type** step of the installer as described above. @@ -282,46 +275,54 @@ step of the installer as described above. If the box next to the **Free-threaded Python** package name is checked, a separate :file:`PythonT.framework` will also be installed alongside the normal :file:`Python.framework` in :file:`/Library/Frameworks`. -This configuration allows a free-threaded Python |usemac_x_dot_y| build to co-exist -on your system with a traditional (GIL only) Python |usemac_x_dot_y| build with -minimal risk while installing or testing. This installation layout is itself -experimental and is subject to change in future releases. +This configuration allows a free-threaded Python |version| build to co-exist +on your system with a traditional (GIL only) Python |version| build with +minimal risk while installing or testing. This installation layout may +change in future releases. Known cautions and limitations: - The **UNIX command-line tools** package, which is selected by default, - will install links in :file:`/usr/local/bin` for |usemac_python_x_dot_y_t_literal|, - the free-threaded interpreter, and |usemac_python_x_dot_y_t_literal_config|, + will install links in :file:`/usr/local/bin` for |python_x_dot_y_t_literal|, + the free-threaded interpreter, and |python_x_dot_y_t_literal_config|, a configuration utility which may be useful for package builders. Since :file:`/usr/local/bin` is typically included in your shell ``PATH``, in most cases no changes to your ``PATH`` environment variables should - be needed to use |usemac_python_x_dot_y_t_literal|. + be needed to use |python_x_dot_y_t_literal|. - For this release, the **Shell profile updater** package and the - :file:`Update Shell Profile.command` in |usemac_applications_folder_version| + :file:`Update Shell Profile.command` in |applications_python_version_literal| do not support the free-threaded package. - The free-threaded build and the traditional build have separate search paths and separate :file:`site-packages` directories so, by default, if you need a package available in both builds, it may need to be installed in both. The free-threaded package will install a separate instance of :program:`pip` for use - with |usemac_python_x_dot_y_t_literal|. + with |python_x_dot_y_t_literal|. - To install a package using :command:`pip` without a :command:`venv`: - |usemac_python_x_dot_y_t_literal| ``-m pip install `` + .. parsed-literal:: + + python\ |version|\ t -m pip install - When working with multiple Python environments, it is usually safest and easiest to :ref:`create and use virtual environments `. This can avoid possible command name conflicts and confusion about which Python is in use: - |usemac_python_x_dot_y_t_literal| ``-m venv `` + .. parsed-literal:: + + python\ |version|\ t -m venv + then :command:`activate`. - To run a free-threaded version of IDLE: - |usemac_python_x_dot_y_t_literal| ``-m idlelib`` + .. parsed-literal:: + + python\ |version|\ t -m idlelib + - The interpreters in both builds respond to the same :ref:`PYTHON environment variables ` @@ -337,28 +338,28 @@ Known cautions and limitations: thus it only needs to be run once. - If you cannot depend on the link in ``/usr/local/bin`` pointing to the - ``python.org`` free-threaded |usemac_python_x_dot_y_t_literal| (for example, if you want + ``python.org`` free-threaded |python_x_dot_y_t_literal| (for example, if you want to install your own version there or some other distribution does), you can explicitly set your shell ``PATH`` environment variable to include the ``PythonT`` framework ``bin`` directory: - .. code-block:: sh + .. parsed-literal:: - export PATH="/Library/Frameworks/PythonT.framework/Versions/3.13/bin":"$PATH" + export PATH="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin":"$PATH" The traditional framework installation by default does something similar, except for :file:`Python.framework`. Be aware that having both framework ``bin`` directories in ``PATH`` can lead to confusion if there are duplicate names - like ``python3.13`` in both; which one is actually used depends on the order + like |python_x_dot_y_literal| in both; which one is actually used depends on the order they appear in ``PATH``. The ``which python3.x`` or ``which python3.xt`` commands can show which path is being used. Using virtual environments can help avoid such ambiguities. Another option might be to create a shell :command:`alias` to the desired interpreter, like: - .. code-block:: sh + .. parsed-literal:: - alias py3.13="/Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13" - alias py3.13t="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t" + alias py\ |version|\ ="/Library/Frameworks/Python.framework/Versions/\ |version|\ /bin/python\ |version|\ " + alias py\ |version|\ t="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin/python\ |version|\ t" Installing using the command line --------------------------------- @@ -369,22 +370,22 @@ the macOS command line :command:`installer` utility lets you select non-default options, too. If you are not familiar with :command:`installer`, it can be somewhat cryptic (see :command:`man installer` for more information). As an example, the following shell snippet shows one way to do it, -using the ``3.13.0b2`` release and selecting the free-threaded interpreter +using the |x_dot_y_b2_literal| release and selecting the free-threaded interpreter option: -.. code-block:: sh +.. parsed-literal:: - RELEASE="python-3.13.0b2-macos11.pkg" + RELEASE="python-\ |version|\ 0b2-macos11.pkg" # download installer pkg - curl -O https://www.python.org/ftp/python/3.13.0/${RELEASE} + curl -O \https://www.python.org/ftp/python/\ |version|\ .0/${RELEASE} # create installer choicechanges to customize the install: - # enable the PythonTFramework-3.13 package + # enable the PythonTFramework-\ |version|\ package # while accepting the other defaults (install all other packages) cat > ./choicechanges.plist < - + @@ -393,7 +394,7 @@ option: choiceAttribute selected choiceIdentifier - org.python.Python.PythonTFramework-3.13 + org.python.Python.PythonTFramework-\ |version|\ @@ -404,19 +405,19 @@ option: You can then test that both installer builds are now available with something like: -.. code-block:: console +.. parsed-literal:: $ # test that the free-threaded interpreter was installed if the Unix Command Tools package was enabled - $ /usr/local/bin/python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ t -VV + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] $ # and the traditional interpreter - $ /usr/local/bin/python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] $ # test that they are also available without the prefix if /usr/local/bin is on $PATH - $ python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] - $ python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ t -VV + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] .. note:: diff --git a/Doc/using/mac_installer_07_applications.png b/Doc/using/mac_installer_07_applications.png index 940219cad6f61c..c8b3aa1099adfc 100644 Binary files a/Doc/using/mac_installer_07_applications.png and b/Doc/using/mac_installer_07_applications.png differ diff --git a/Doc/using/unix.rst b/Doc/using/unix.rst index 9ec4e3419321a4..a9950ef7525497 100644 --- a/Doc/using/unix.rst +++ b/Doc/using/unix.rst @@ -84,11 +84,17 @@ On FreeBSD and OpenBSD Building Python =============== +.. seealso:: + + If you want to contribute to CPython, refer to the + `devguide `_, + which includes build instructions and other tips on setting up environment. + If you want to compile CPython yourself, first thing you should do is get the `source `_. You can download either the -latest release's source or just grab a fresh `clone -`_. (If you want -to contribute patches, you will need a clone.) +latest release's source or grab a fresh `clone +`_. +You will also need to install the :ref:`build requirements `. The build process consists of the usual commands:: diff --git a/Doc/using/win_install_freethreaded.png b/Doc/using/win_install_freethreaded.png index 0aa01c1df6e051..12b89c0165d994 100644 Binary files a/Doc/using/win_install_freethreaded.png and b/Doc/using/win_install_freethreaded.png differ diff --git a/Doc/using/win_installer.png b/Doc/using/win_installer.png index 03bf2d7b16c599..fc9605a79cfcc5 100644 Binary files a/Doc/using/win_installer.png and b/Doc/using/win_installer.png differ diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 74d6db5d7d1a98..e15132e9752448 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -4,6 +4,8 @@ .. _Microsoft Store app: https://apps.microsoft.com/detail/9NQ7512CXL7T +.. _legacy launcher: https://www.python.org/ftp/python/3.14.0/win32/launcher.msi + .. _using-on-windows: ************************* @@ -60,7 +62,7 @@ packages. .. _windows-path-mod: .. _launcher: -Python Install Manager +Python install manager ====================== Installation @@ -77,34 +79,33 @@ To install the file downloaded from python.org, either double-click and select "Install", or run ``Add-AppxPackage `` in Windows Powershell. After installation, the ``python``, ``py``, and ``pymanager`` commands should be -available. If they are not, click Start and search for "Manage app execution -aliases". This settings page will let you enable the relevant commands. They -will be labelled "Python (default)", "Python (default windowed)", and "Python -install manager". - -If you have existing installations of Python, or you have modified your -:envvar:`PATH` variable, you may need to remove them or undo the modifications -in order for the commands to work. Old versions of Python can be reinstalled -using the Python install manager. +available. If you have existing installations of Python, or you have modified +your :envvar:`PATH` variable, you may need to remove them or undo the +modifications. See :ref:`pymanager-troubleshoot` for more help with fixing +non-working commands. When you first install a runtime, you will likely be prompted to add a directory to your :envvar:`PATH`. This is optional, if you prefer to use the ``py`` command, but is offered for those who prefer the full range of aliases (such as ``python3.14.exe``) to be available. The directory will be -:file:`%LocalAppData%\Python\bin` by default, but may be customized by an +:file:`%LocalAppData%\\Python\\bin` by default, but may be customized by an administrator. Click Start and search for "Edit environment variables for your account" for the system settings page to add the path. +Each Python runtime you install will have its own directory for scripts. These +also need to be added to :envvar:`PATH` if you want to use them. + The Python install manager will be automatically updated to new releases. This does not affect any installs of Python runtimes. Uninstalling the Python install manager does not uninstall any Python runtimes. If you are not able to install an MSIX in your context, for example, you are -using automated deployment software that does not support it, please see -:ref:`pymanager-advancedinstall` below for more information. +using automated deployment software that does not support it, or are targeting +Windows Server 2019, please see :ref:`pymanager-advancedinstall` below for more +information. -Basic Use +Basic use --------- The recommended command for launching Python is ``python``, which will either @@ -147,6 +148,10 @@ want to be passed to the runtime (such as script files or the module to launch): $> py -m this ... +The default runtime can be overridden with the :envvar:`PYTHON_MANAGER_DEFAULT` +environment variable, or a configuration file. See :ref:`pymanager-config` for +information about configuration settings. + To launch a specific runtime, the ``py`` command accepts a ``-V:`` option. This option must be specified before any others. The tag is part or all of the identifier for the runtime; for those from the CPython team, it looks like the @@ -190,7 +195,7 @@ installed if automatic installation is configured (most likely by setting ``pymanager exec`` forms of the command were used. -Command Help +Command help ------------ The ``py help`` command will display the full list of supported commands, along @@ -215,7 +220,7 @@ override multiple settings at once. See :ref:`pymanager-config` below for more information about these files. -Listing Runtimes +Listing runtimes ---------------- .. code:: @@ -256,7 +261,7 @@ For compatibility with the old launcher, the ``--list``, ``--list-paths``, additional options, and will produce legacy formatted output. -Installing Runtimes +Installing runtimes ------------------- .. code:: @@ -285,8 +290,12 @@ Passing ``--dry-run`` will generate output and logs, but will not modify any installs. In addition to the above options, the ``--target`` option will extract the -runtime to the specified directory instead of doing a normal install. This is -useful for embedding runtimes into larger applications. +runtime to the specified directory instead of doing a normal install. +This is useful for embedding runtimes into larger applications. +Unlike a normal install, ``py`` will not be aware of the extracted runtime, +and no Start menu or other shortcuts will be created. +To launch the runtime, directly execute the main executable (typically +``python.exe``) in the target directory. .. code:: @@ -295,7 +304,7 @@ useful for embedding runtimes into larger applications. .. _pymanager-offline: -Offline Installs +Offline installs ---------------- To perform offline installs of Python, you will need to first create an offline @@ -327,7 +336,7 @@ In this way, Python runtimes can be installed and managed on a machine without access to the internet. -Uninstalling Runtimes +Uninstalling runtimes --------------------- .. code:: @@ -373,59 +382,108 @@ overridden installs may resolve settings differently. A global configuration file may be configured by an administrator, and would be read first. The user configuration file is stored at -:file:`%AppData%\\Python\\pymanager.json` (by default) and is read next, +:file:`%AppData%\\Python\\pymanager.json` +(note that this location is under ``Roaming``, not ``Local``) and is read next, overwriting any settings from earlier files. An additional configuration file may be specified as the ``PYTHON_MANAGER_CONFIG`` environment variable or the ``--config`` command line option (but not both). +These locations may be modified by administrative customization options listed +later. The following settings are those that are considered likely to be modified in normal use. Later sections list those that are intended for administrative customization. -.. csv-table:: Standard configuration options - :header: "Config Key", "Environment Variable", "Description" - :widths: 2, 2, 4 - - ``default_tag``,``PYTHON_MANAGER_DEFAULT``,"The preferred default - version to launch or install. By default, this is interpreted as the most - recent non-prerelease version from the CPython team. - " - ``default_platform``,``PYTHON_MANAGER_DEFAULT_PLATFORM``,"The preferred - default platform to launch or install. This is treated as a suffix to the - specified tag, such that ``py -V:3.14`` would prefer an install for - ``3.14-64`` if it exists (and ``default_platform`` is ``-64``), but will use - ``3.14`` if no tagged install exists. - " - ``logs_dir``,``PYTHON_MANAGER_LOGS``,"The location where log files are - written. By default, :file:`%TEMP%`. - " - ``automatic_install``,``PYTHON_MANAGER_AUTOMATIC_INSTALL``,"True to - allow automatic installs when specifying a particular runtime to launch. - By default, true. - " - ``include_unmanaged``,``PYTHON_MANAGER_INCLUDE_UNMANAGED``,"True to - allow listing and launching runtimes that were not installed by the Python - install manager, or false to exclude them. By default, true. - " - ``shebang_can_run_anything``,"``PYTHON_MANAGER_SHEBANG_CAN_RUN_ANYTHING`` - ","True to allow shebangs in ``.py`` files to launch applications other than - Python runtimes, or false to prevent it. By default, true. - " - ``log_level``,"``PYMANAGER_VERBOSE``, ``PYMANAGER_DEBUG``","Set - the default level of output (0-50) By default, 20. Lower values produce more - output. The environment variables are boolean, and may produce additional - output during startup that is later suppressed by other configuration. - " - ``confirm``,``PYTHON_MANAGER_CONFIRM``,"True to confirm certain actions - before taking them (such as uninstall), or false to skip the confirmation. By - default, true. - " - ``install.source``,``PYTHON_MANAGER_SOURCE_URL``,"Override the index - feed to obtain new installs from. - " - ``list.format``,``PYTHON_MANAGER_LIST_FORMAT``,"Specify the default - format used by the ``py list`` command. By default, ``table``. - " +.. Sphinx bug with text writer; remove widths & caption temporarily +.. :widths: 2, 2, 4 + +.. rubric:: Standard configuration options + +.. list-table:: + :header-rows: 1 + + * - Config Key + - Environment Variable + - Description + + * - ``default_tag`` + - .. envvar:: PYTHON_MANAGER_DEFAULT + - The preferred default version to launch or install. + By default, this is interpreted as the most recent non-prerelease version + from the CPython team. + + * - ``default_platform`` + - ``PYTHON_MANAGER_DEFAULT_PLATFORM`` + - The preferred default platform to launch or install. + This is treated as a suffix to the specified tag, such that ``py -V:3.14`` + would prefer an install for ``3.14-64`` if it exists + (and ``default_platform`` is ``-64``), + but will use ``3.14`` if no tagged install exists. + + * - ``logs_dir`` + - ``PYTHON_MANAGER_LOGS`` + - The location where log files are written. + By default, :file:`%TEMP%`. + + * - ``automatic_install`` + - ``PYTHON_MANAGER_AUTOMATIC_INSTALL`` + - True to allow automatic installs when using ``py exec`` to launch. + Other commands will not automatically install. + By default, true. + + * - ``include_unmanaged`` + - ``PYTHON_MANAGER_INCLUDE_UNMANAGED`` + - True to allow listing and launching runtimes that were not installed + by the Python install manager, or false to exclude them. + By default, true. + + * - ``shebang_can_run_anything`` + - ``PYTHON_MANAGER_SHEBANG_CAN_RUN_ANYTHING`` + - True to allow shebangs in ``.py`` files to launch applications other than + Python runtimes, or false to prevent it. + By default, true. + + * - ``log_level`` + - ``PYMANAGER_VERBOSE``, ``PYMANAGER_DEBUG`` + - Set the default level of output (0-50). + By default, 20. + Lower values produce more output. + The environment variables are boolean, and may produce additional + output during startup that is later suppressed by other configuration. + + * - ``confirm`` + - ``PYTHON_MANAGER_CONFIRM`` + - True to confirm certain actions before taking them (such as uninstall), + or false to skip the confirmation. + By default, true. + + * - ``install.source`` + - ``PYTHON_MANAGER_SOURCE_URL`` + - Override the index feed to obtain new installs from. + + * - ``list.format`` + - ``PYTHON_MANAGER_LIST_FORMAT`` + - Specify the default format used by the ``py list`` command. + By default, ``table``. + + * - ``install_dir`` + - (none) + - Specify the root directory that runtimes will be installed into. + If you change this setting, previously installed runtimes will not be + usable unless you move them to the new location. + + * - ``global_dir`` + - (none) + - Specify the directory where global commands (such as ``python3.14.exe``) + are stored. + This directory should be added to your :envvar:`PATH` to make the + commands available from your terminal. + + * - ``download_dir`` + - (none) + - Specify the directory where downloaded files are stored. + This directory is a temporary cache, and can be cleaned up from time to + time. Dotted names should be nested inside JSON objects, for example, ``list.format`` would be specified as ``{"list": {"format": "table"}}``. @@ -469,6 +527,10 @@ directory (which you may have added to your :envvar:`PATH` environment variable) can be used in a shebang, even if it is not on your :envvar:`PATH`. This allows the use of shebangs like ``/usr/bin/python3.12`` to select a particular runtime. +If no runtimes are installed, or if automatic installation is enabled, the +requested runtime will be installed if necessary. See :ref:`pymanager-config` +for information about configuration settings. + The ``/usr/bin/env`` form of shebang line will also search the :envvar:`PATH` environment variable for unrecognized commands. This corresponds to the behaviour of the Unix ``env`` program, which performs the same search, but @@ -485,21 +547,26 @@ which the path to the script and any additional arguments will be appended. This functionality may be disabled by the ``shebang_can_run_anything`` configuration option. -.. note: +.. note:: The behaviour of shebangs in the Python install manager is subtly different from the previous ``py.exe`` launcher, and the old configuration options no longer apply. If you are specifically reliant on the old behaviour or - configuration, we recommend keeping the legacy launcher. It may be - `downloaded independently `_ - and installed on its own. The legacy launcher's ``py`` command will override - PyManager's one, and you will need to use ``pymanager`` commands for - installing and uninstalling. + configuration, we recommend installing the `legacy launcher`_. The legacy + launcher's ``py`` command will override PyManager's one by default, and you + will need to use ``pymanager`` commands for installing and uninstalling. + +.. _Add-AppxPackage: https://learn.microsoft.com/powershell/module/appx/add-appxpackage +.. _Remove-AppxPackage: https://learn.microsoft.com/powershell/module/appx/remove-appxpackage + +.. _Add-AppxProvisionedPackage: https://learn.microsoft.com/powershell/module/dism/add-appxprovisionedpackage + +.. _PackageManager: https://learn.microsoft.com/uwp/api/windows.management.deployment.packagemanager .. _pymanager-advancedinstall: -Advanced Installation +Advanced installation --------------------- For situations where an MSIX cannot be installed, such as some older @@ -509,6 +576,11 @@ per-machine installs to its default location in Program Files. It will attempt to modify the system :envvar:`PATH` environment variable to include this install location, but be sure to validate this on your configuration. +.. note:: + + Windows Server 2019 is the only version of Windows that CPython supports that + does not support MSIX. For Windows Server 2019, you should use the MSI. + Be aware that the MSI package does not bundle any runtimes, and so is not suitable for installs into offline environments without also creating an offline install index. See :ref:`pymanager-offline` and :ref:`pymanager-admin-config` @@ -529,29 +601,66 @@ depending on whether it was installed from python.org or through the Windows Store. Attempting to run the executable directly from Program Files is not recommended. -To programmatically install or uninstall the MSIX without using your -distribution platform's native support, the `Add-AppxPackage -`_ and -`Remove-AppxPackage `_ -PowerShell cmdlets are simplest to use: +To programmatically install the Python install manager, it is easiest to use +WinGet, which is included with all supported versions of Windows: -.. code:: +.. code-block:: powershell + + $> winget install 9NQ7512CXL7T -e --accept-package-agreements --disable-interactivity + + # Optionally run the configuration checker and accept all changes + $> py install --configure -y + +To download the Python install manager and install on another machine, the +following WinGet command will download the required files from the Store to your +Downloads directory (add ``-d `` to customize the output location). +This also generates a YAML file that appears to be unnecessary, as the +downloaded MSIX can be installed by launching or using the commands below. + +.. code-block:: powershell + + $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --accept-source-agreements + +To programmatically install or uninstall an MSIX using only PowerShell, the +`Add-AppxPackage`_ and `Remove-AppxPackage`_ PowerShell cmdlets are recommended: + +.. code-block:: powershell $> Add-AppxPackage C:\Downloads\python-manager-25.0.msix ... $> Get-AppxPackage PythonSoftwareFoundation.PythonManager | Remove-AppxPackage -The native APIs for package management may be found on the Windows -`PackageManager `_ -class. The :func:`!AddPackageAsync` method installs for the current user, or use -:func:`!StagePackageAsync` followed by :func:`!ProvisionPackageForAllUsersAsync` -to install the Python install manager for all users from the MSIX package. Users -will still need to install their own copies of Python itself, as there is no way -to trigger those installs without being a logged in user. +The latest release can be downloaded and installed by Windows by passing the +AppInstaller file to the Add-AppxPackage command. This installs using the MSIX +on python.org, and is only recommended for cases where installing via the Store +(interactively or using WinGet) is not possible. + +.. code-block:: powershell + + $> Add-AppxPackage -AppInstallerFile https://www.python.org/ftp/python/pymanager/pymanager.appinstaller + +Other tools and APIs may also be used to provision an MSIX package for all users +on a machine, but Python does not consider this a supported scenario. We suggest +looking into the PowerShell `Add-AppxProvisionedPackage`_ cmdlet, the native +Windows `PackageManager`_ class, or the documentation and support for your +deployment tool. + +Regardless of the install method, users will still need to install their own +copies of Python itself, as there is no way to trigger those installs without +being a logged in user. When using the MSIX, the latest version of Python will +be available for all users to install without network access. + +Note that the MSIX downloadable from the Store and from the Python website are +subtly different and cannot be installed at the same time. Wherever possible, +we suggest using the above WinGet commands to download the package from the +Store to reduce the risk of setting up conflicting installs. There are no +licensing restrictions on the Python install manager that would prevent using +the Store package in this way. + .. _pymanager-admin-config: -Administrative Configuration +Administrative configuration ---------------------------- There are a number of options that may be useful for administrators to override @@ -581,60 +690,73 @@ variable will be used instead. Configuration settings that are paths are interpreted as relative to the directory containing the configuration file that specified them. -.. csv-table:: Administrative configuration options - :header: "Config Key", "Description" - :widths: 1, 4 - - ``base_config``,"The highest priority configuration file to read. Note that - only the built-in configuration file and the registry can modify this - setting. - " - ``user_config``,"The second configuration file to read. - " - ``additional_config``,"The third configuration file to read. - " - ``registry_override_key``,"Registry location to check for overrides. Note - that only the built-in configuration file can modify this setting. - " - ``bundled_dir``,"Read-only directory containing locally cached files. - " - ``install.fallback_source``,"Path or URL to an index to consult when the - main index cannot be accessed. - " - ``install.enable_shortcut_kinds``,"Comma-separated list of shortcut kinds - to allow (e.g. ``""pep514,start""``). Enabled shortcuts may still be disabled - by ``disable_shortcut_kinds``. - " - ``install.disable_shortcut_kinds``,"Comma-separated list of shortcut kinds - to exclude (e.g. ``""pep514,start""``). Disabled shortcuts are not - reactivated by ``enable_shortcut_kinds``. - " - ``pep514_root``,"Registry location to read and write PEP 514 entries into. - By default, :file:`HKEY_CURRENT_USER\\Software\\Python`. - " - ``start_folder``,"Start menu folder to write shortcuts into. By default, - ``Python``. This path is relative to the user's Programs folder. - " - ``virtual_env``,"Path to the active virtual environment. By default, this - is ``%VIRTUAL_ENV%``, but may be set empty to disable venv detection. - " - ``shebang_can_run_anything_silently``,"True to suppress visible warnings - when a shebang launches an application other than a Python runtime. - " +.. Sphinx bug with text writer; remove widths & caption temporarily +.. :widths: 1, 4 -.. _install-freethreaded-windows: +.. rubric:: Administrative configuration options -Installing Free-threaded Binaries ---------------------------------- +.. list-table:: + :header-rows: 1 -.. versionadded:: 3.13 (Experimental) + * - Config Key + - Description -.. note:: + * - ``base_config`` + - The highest priority configuration file to read. + Note that only the built-in configuration file and the registry can + modify this setting. + + * - ``user_config`` + - The second configuration file to read. + + * - ``additional_config`` + - The third configuration file to read. + + * - ``registry_override_key`` + - Registry location to check for overrides. + Note that only the built-in configuration file can modify this setting. + + * - ``bundled_dir`` + - Read-only directory containing locally cached files. + + * - ``install.fallback_source`` + - Path or URL to an index to consult when the main index cannot be accessed. + + * - ``install.enable_shortcut_kinds`` + - Comma-separated list of shortcut kinds to allow (e.g. ``"pep514,start"``). + Enabled shortcuts may still be disabled by ``disable_shortcut_kinds``. + + * - ``install.disable_shortcut_kinds`` + - Comma-separated list of shortcut kinds to exclude + (e.g. ``"pep514,start"``). + Disabled shortcuts are not reactivated by ``enable_shortcut_kinds``. + + * - ``pep514_root`` + - Registry location to read and write PEP 514 entries into. + By default, :file:`HKEY_CURRENT_USER\\Software\\Python`. + + * - ``start_folder`` + - Start menu folder to write shortcuts into. + By default, ``Python``. + This path is relative to the user's Programs folder. + + * - ``virtual_env`` + - Path to the active virtual environment. + By default, this is ``%VIRTUAL_ENV%``, but may be set empty + to disable venv detection. - Everything described in this section is considered experimental, - and should be expected to change in future releases. + * - ``shebang_can_run_anything_silently`` + - True to suppress visible warnings when a shebang launches an application + other than a Python runtime. -Pre-built distributions of the experimental free-threaded build are available +.. _install-freethreaded-windows: + +Installing free-threaded binaries +--------------------------------- + +.. versionadded:: 3.13 + +Pre-built distributions of the free-threaded build are available by installing tags with the ``t`` suffix. .. code:: @@ -659,61 +781,127 @@ issue at `our bug tracker `_, including any relevant log files (written to your :file:`%TEMP%` directory by default). -.. csv-table:: Troubleshooting - :header: "Symptom", "Things to try" - :widths: 1, 1 - - "``python`` gives me a ""command not found"" error or opens the Store app - when I type it in my terminal.", "Did you :ref:`install the Python install - manager `? - " - "", "Click Start, open ""Manage app execution aliases"", and check that the - aliases for ""Python (default)"" are enabled. If they already are, try - disabling and re-enabling to refresh the command. The ""Python (default - windowed)"" and ""Python install manager"" commands may also need refreshing. - " - "", "Check that the ``py`` and ``pymanager`` commands work. - " - "``py`` gives me a ""command not found"" error when I type it in my - terminal.","Did you :ref:`install the Python install manager `? - " - "", "Click Start, open ""Manage app execution aliases"", and check that the - aliases for ""Python install manager"" are enabled. If they already are, try - disabling and re-enabling to refresh the command. The ""Python (default - windowed)"" and ""Python install manager"" commands may also need refreshing. - " - "``py`` gives me a ""can't open file"" error when I type commands in my - terminal.", "This usually means you have the legacy launcher installed and it - has priority over the Python install manager. To remove, click Start, open - ""Installed apps"", search for ""Python launcher"" and uninstall it. - " - "``python`` doesn't launch the same runtime as ``py``", "Click Start, open - ""Installed apps"", look for any existing Python runtimes, and either remove - them or Modify and disable the :envvar:`PATH` options. - " - "", "Click Start, open ""Manage app execution aliases"", and check that your - ``python.exe`` alias is set to ""Python (default)"" - " - "``python`` and ``py`` don't launch the runtime I expect", "Check your - ``PYTHON_MANAGER_DEFAULT`` environment variable or ``default_tag`` - configuration. The ``py list`` command will show your default based on these - settings. - " - "", "Installs that are managed by the Python install manager will be chosen - ahead of unmanaged installs. Use ``py install`` to install the runtime you - expect, or configure your default tag. - " - "", "Prerelease and experimental installs that are not managed by the Python - install manager may be chosen ahead of stable releases. Configure your - default tag or uninstall the prerelease runtime and reinstall using ``py - install``. - " - "``pythonw`` or ``pyw`` don't launch the same runtime as ``python`` or - ``py``","Click Start, open ""Manage app execution aliases"", and check that - your ``pythonw.exe`` and ``pyw.exe`` aliases are consistent with your - others. - " - +.. Sphinx bug with text writer; remove widths & caption temporarily +.. :widths: 1, 1 + +.. rubric:: Troubleshooting + +.. list-table:: + :header-rows: 1 + + * - Symptom + - Things to try + + * - ``python`` gives me a "command not found" error or opens the Store app + when I type it in my terminal. + - Did you :ref:`install the Python install manager `? + + * - + - Click Start, open "Manage app execution aliases", and check that the + aliases for "Python (default)" are enabled. + If they already are, try disabling and re-enabling to refresh the command. + The "Python (default windowed)" and "Python install manager" commands + may also need refreshing. + + * - + - Check that the ``py`` and ``pymanager`` commands work. + + * - + - Ensure your :envvar:`PATH` variable contains the entry for + ``%UserProfile%\AppData\Local\Microsoft\WindowsApps``. + The operating system includes this entry once by default, after other + user paths. If removed, shortcuts will not be found. + + * - ``py`` gives me a "command not found" error when I type it in my terminal. + - Did you :ref:`install the Python install manager `? + + * - + - Click Start, open "Manage app execution aliases", and check that the + aliases for "Python (default)" are enabled. + If they already are, try disabling and re-enabling to refresh the command. + The "Python (default windowed)" and "Python install manager" commands + may also need refreshing. + + * - + - Ensure your :envvar:`PATH` variable contains the entry for + ``%UserProfile%\AppData\Local\Microsoft\WindowsApps``. + The operating system includes this entry once by default, after other + user paths. If removed, shortcuts will not be found. + + * - ``py`` gives me a "can't open file" error when I type commands in my + terminal. + - This usually means you have the legacy launcher installed and + it has priority over the Python install manager. + To remove, click Start, open "Installed apps", + search for "Python launcher" and uninstall it. + + * - ``python`` doesn't launch the same runtime as ``py`` + - Click Start, open "Installed apps", look for any existing Python runtimes, + and either remove them or Modify and disable the :envvar:`PATH` options. + + * - + - Click Start, open "Manage app execution aliases", and check that your + ``python.exe`` alias is set to "Python (default)" + + * - ``python`` and ``py`` don't launch the runtime I expect + - Check your :envvar:`PYTHON_MANAGER_DEFAULT` environment variable + or ``default_tag`` configuration. + The ``py list`` command will show your default based on these settings. + + * - + - Installs that are managed by the Python install manager will be chosen + ahead of unmanaged installs. + Use ``py install`` to install the runtime you expect, + or configure your default tag. + + * - + - Prerelease and experimental installs that are not managed by the Python + install manager may be chosen ahead of stable releases. + Configure your default tag or uninstall the prerelease runtime + and reinstall it using ``py install``. + + * - ``pythonw`` or ``pyw`` don't launch the same runtime as ``python`` or ``py`` + - Click Start, open "Manage app execution aliases", and check that your + ``pythonw.exe`` and ``pyw.exe`` aliases are consistent with your others. + + * - ``pip`` gives me a "command not found" error when I type it in my terminal. + - Have you activated a virtual environment? + Run the ``.venv\Scripts\activate`` script in your terminal to activate. + + * - + - The package may be available but missing the generated executable. + We recommend using the ``python -m pip`` command instead, + or alternatively the ``python -m pip install --force pip`` command + will recreate the executables and show you the path to + add to :envvar:`PATH`. + These scripts are separated for each runtime, and so you may need to + add multiple paths. + + * - Typing ``script-name.py`` in the terminal opens in a new window. + - This is a known limitation of the operating system. Either specify ``py`` + before the script name, create a batch file containing ``@py "%~dpn0.py" %*`` + with the same name as the script, or install the `legacy launcher`_ + and select it as the association for scripts. + + * - Drag-dropping files onto a script doesn't work + - This is a known limitation of the operating system. It is supported with + the `legacy launcher`_, or with the Python install manager when installed + from the MSI. + + * - I have installed the Python install manager multiple times. + - It is possible to install from the Store or WinGet, from the MSIX on + the Python website, and from the MSI, all at once. + They are all compatible and will share configuration and runtimes. + + * - + - See the earlier :ref:`pymanager-advancedinstall` section for ways to + uninstall the install manager other than the typical Installed Apps + (Add and Remove Programs) settings page. + + * - My old ``py.ini`` settings no longer work. + - The new Python install manager no longer supports this configuration file + or its settings, and so it will be ignored. + See :ref:`pymanager-config` for information about configuration settings. .. _windows-embeddable: @@ -731,7 +919,7 @@ To install an embedded distribution, we recommend using ``py install`` with the .. code:: - $> py install 3.14-embed --target=runtime + $> py install 3.14-embed --target= When extracted, the embedded distribution is (almost) fully isolated from the user's system, including environment variables, system registry settings, and @@ -754,7 +942,7 @@ versions before providing updates to users. The two recommended use cases for this distribution are described below. -Python Application +Python application ------------------ An application written in Python does not necessarily require users to be aware @@ -858,12 +1046,7 @@ for the 64-bit version, `www.nuget.org/packages/pythonx86 Free-threaded packages ---------------------- -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. +.. versionadded:: 3.13 Packages containing free-threaded binaries are named `python-freethreaded `_ @@ -915,7 +1098,7 @@ please install Python 3.12. .. _max-path: -Removing the MAX_PATH Limitation +Removing the MAX_PATH limitation ================================ Windows historically has limited path lengths to 260 characters. This meant that @@ -1196,7 +1379,7 @@ installation". In this case: * Shortcuts are available for all users -Removing the MAX_PATH Limitation +Removing the MAX_PATH limitation -------------------------------- Windows historically has limited path lengths to 260 characters. This meant that @@ -1219,7 +1402,7 @@ After changing the above option, no further configuration is required. .. _install-quiet-option: -Installing Without UI +Installing without UI --------------------- All of the options available in the installer UI can also be specified from the @@ -1368,7 +1551,7 @@ example file sets the same options as the previous example: .. _install-layout-option: -Installing Without Downloading +Installing without downloading ------------------------------ As some features of Python are not included in the initial installer download, @@ -1409,15 +1592,10 @@ settings and replace any that have been removed or modified. :ref:`launcher`, which has its own entry in Programs and Features. -Installing Free-threaded Binaries +Installing free-threaded binaries --------------------------------- -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. +.. versionadded:: 3.13 To install pre-built binaries with free-threading enabled (see :pep:`703`), you should select "Customize installation". The second page of options includes the @@ -1449,7 +1627,7 @@ builds. Free-threaded binaries are also available :ref:`on nuget.org `. -Python Launcher for Windows (Deprecated) +Python launcher for Windows (deprecated) ======================================== .. deprecated:: 3.14 @@ -1601,7 +1779,7 @@ have the script specify the version which should be used. The key benefit of this is that a single launcher can support multiple Python versions at the same time depending on the contents of the first line. -Shebang Lines +Shebang lines ------------- If the first line of a script file starts with ``#!``, it is known as a @@ -1666,7 +1844,7 @@ program, which performs a :envvar:`PATH` search. If an executable matching the first argument after the ``env`` command cannot be found, but the argument starts with ``python``, it will be handled as described for the other virtual commands. -The environment variable :envvar:`PYLAUNCHER_NO_SEARCH_PATH` may be set +The environment variable :envvar:`!PYLAUNCHER_NO_SEARCH_PATH` may be set (to any value) to skip this search of :envvar:`PATH`. Shebang lines that do not match any of these patterns are looked up in the @@ -1733,7 +1911,7 @@ For example, a shebang line of ``#!python`` has no version qualifier, while ``#!python3`` has a version qualifier which specifies only a major version. If no version qualifiers are found in a command, the environment -variable :envvar:`PY_PYTHON` can be set to specify the default version +variable :envvar:`!PY_PYTHON` can be set to specify the default version qualifier. If it is not set, the default is "3". The variable can specify any value that may be passed on the command line, such as "3", "3.7", "3.7-32" or "3.7-64". (Note that the "-64" option is only @@ -1806,17 +1984,17 @@ For example: Diagnostics ----------- -If an environment variable :envvar:`PYLAUNCHER_DEBUG` is set (to any value), the +If an environment variable :envvar:`!PYLAUNCHER_DEBUG` is set (to any value), the launcher will print diagnostic information to stderr (i.e. to the console). While this information manages to be simultaneously verbose *and* terse, it should allow you to see what versions of Python were located, why a particular version was chosen and the exact command-line used to execute the target Python. It is primarily intended for testing and debugging. -Dry Run +Dry run ------- -If an environment variable :envvar:`PYLAUNCHER_DRYRUN` is set (to any value), +If an environment variable :envvar:`!PYLAUNCHER_DRYRUN` is set (to any value), the launcher will output the command it would have run, but will not actually launch Python. This may be useful for tools that want to use the launcher to detect and then launch Python directly. Note that the command written to @@ -1826,14 +2004,14 @@ the console. Install on demand ----------------- -If an environment variable :envvar:`PYLAUNCHER_ALLOW_INSTALL` is set (to any +If an environment variable :envvar:`!PYLAUNCHER_ALLOW_INSTALL` is set (to any value), and the requested Python version is not installed but is available on the Microsoft Store, the launcher will attempt to install it. This may require user interaction to complete, and you may need to run the command again. -An additional :envvar:`PYLAUNCHER_ALWAYS_INSTALL` variable causes the launcher +An additional :envvar:`!PYLAUNCHER_ALWAYS_INSTALL` variable causes the launcher to always try to install Python, even if it is detected. This is mainly intended -for testing (and should be used with :envvar:`PYLAUNCHER_DRYRUN`). +for testing (and should be used with :envvar:`!PYLAUNCHER_DRYRUN`). Return codes ------------ diff --git a/Doc/whatsnew/2.0.rst b/Doc/whatsnew/2.0.rst index 1a949ec4035807..c157c0337a1342 100644 --- a/Doc/whatsnew/2.0.rst +++ b/Doc/whatsnew/2.0.rst @@ -656,7 +656,8 @@ break. The change which will probably break the most code is tightening up the arguments accepted by some methods. Some methods would take multiple arguments and treat them as a tuple, particularly various list methods such as -:meth:`!append` and :meth:`!insert`. In earlier versions of Python, if ``L`` is +:meth:`~list.append` and :meth:`~list.insert`. +In earlier versions of Python, if ``L`` is a list, ``L.append( 1,2 )`` appends the tuple ``(1,2)`` to the list. In Python 2.0 this causes a :exc:`TypeError` exception to be raised, with the message: 'append requires exactly 1 argument; 2 given'. The fix is to simply add an diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst index b7e4e73f4ce4aa..f43692b3dce9e8 100644 --- a/Doc/whatsnew/2.3.rst +++ b/Doc/whatsnew/2.3.rst @@ -66,7 +66,7 @@ Here's a simple example:: The union and intersection of sets can be computed with the :meth:`~frozenset.union` and :meth:`~frozenset.intersection` methods; an alternative notation uses the bitwise operators ``&`` and ``|``. Mutable sets also have in-place versions of these methods, -:meth:`!union_update` and :meth:`~frozenset.intersection_update`. :: +:meth:`!union_update` and :meth:`~set.intersection_update`. :: >>> S1 = sets.Set([1,2,3]) >>> S2 = sets.Set([4,5,6]) @@ -87,7 +87,7 @@ It's also possible to take the symmetric difference of two sets. This is the set of all elements in the union that aren't in the intersection. Another way of putting it is that the symmetric difference contains all elements that are in exactly one set. Again, there's an alternative notation (``^``), and an -in-place version with the ungainly name :meth:`~frozenset.symmetric_difference_update`. :: +in-place version with the ungainly name :meth:`~set.symmetric_difference_update`. :: >>> S1 = sets.Set([1,2,3,4]) >>> S2 = sets.Set([3,4,5,6]) diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst index 3430ac8668e280..e195d9d462dda9 100644 --- a/Doc/whatsnew/2.5.rst +++ b/Doc/whatsnew/2.5.rst @@ -2169,9 +2169,9 @@ Changes to Python's build process and to the C API include: * Two new macros can be used to indicate C functions that are local to the current file so that a faster calling convention can be used. - ``Py_LOCAL(type)`` declares the function as returning a value of the + :c:macro:`Py_LOCAL` declares the function as returning a value of the specified *type* and uses a fast-calling qualifier. - ``Py_LOCAL_INLINE(type)`` does the same thing and also requests the + :c:macro:`Py_LOCAL_INLINE` does the same thing and also requests the function be inlined. If macro :c:macro:`!PY_LOCAL_AGGRESSIVE` is defined before :file:`python.h` is included, a set of more aggressive optimizations are enabled for the module; you should benchmark the results to find out if these diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index 9dbc07a34e20a1..243ab85d4296b5 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -56,7 +56,7 @@ Python 2.6 incorporates new features and syntax from 3.0 while remaining compatible with existing code by not removing older features or syntax. When it's not possible to do that, Python 2.6 tries to do what it can, adding compatibility functions in a -:mod:`future_builtins` module and a :option:`!-3` switch to warn about +:mod:`!future_builtins` module and a :option:`!-3` switch to warn about usages that will become unsupported in 3.0. Some significant new packages have been added to the standard library, @@ -109,7 +109,7 @@ are: Python 3.0 adds several new built-in functions and changes the semantics of some existing builtins. Functions that are new in 3.0 such as :func:`bin` have simply been added to Python 2.6, but existing -builtins haven't been changed; instead, the :mod:`future_builtins` +builtins haven't been changed; instead, the :mod:`!future_builtins` module has versions with the new 3.0 semantics. Code written to be compatible with 3.0 can do ``from future_builtins import hex, map`` as necessary. @@ -118,7 +118,7 @@ A new command-line switch, :option:`!-3`, enables warnings about features that will be removed in Python 3.0. You can run code with this switch to see how much work will be necessary to port code to 3.0. The value of this switch is available -to Python code as the boolean variable :data:`sys.py3kwarning`, +to Python code as the boolean variable :data:`!sys.py3kwarning`, and to C extension code as :c:data:`!Py_Py3kWarningFlag`. .. seealso:: @@ -307,9 +307,9 @@ The :mod:`threading` module's locks and condition variables also support the The lock is acquired before the block is executed and always released once the block is complete. -The :func:`localcontext` function in the :mod:`decimal` module makes it easy -to save and restore the current decimal context, which encapsulates the desired -precision and rounding characteristics for computations:: +The :func:`~decimal.localcontext` function in the :mod:`decimal` module makes +it easy to save and restore the current decimal context, which encapsulates +the desired precision and rounding characteristics for computations:: from decimal import Decimal, Context, localcontext @@ -337,12 +337,12 @@ underlying implementation and should keep reading. A high-level explanation of the context management protocol is: * The expression is evaluated and should result in an object called a "context - manager". The context manager must have :meth:`~object.__enter__` and :meth:`~object.__exit__` - methods. + manager". The context manager must have :meth:`~object.__enter__` and + :meth:`~object.__exit__` methods. -* The context manager's :meth:`~object.__enter__` method is called. The value returned - is assigned to *VAR*. If no ``as VAR`` clause is present, the value is simply - discarded. +* The context manager's :meth:`~object.__enter__` method is called. The value + returned is assigned to *VAR*. If no ``as VAR`` clause is present, the + value is simply discarded. * The code in *BLOCK* is executed. @@ -378,7 +378,7 @@ be to let the user write code like this:: The transaction should be committed if the code in the block runs flawlessly or rolled back if there's an exception. Here's the basic interface for -:class:`DatabaseConnection` that I'll assume:: +:class:`!DatabaseConnection` that I'll assume:: class DatabaseConnection: # Database interface @@ -431,14 +431,15 @@ The contextlib module The :mod:`contextlib` module provides some functions and a decorator that are useful when writing objects for use with the ':keyword:`with`' statement. -The decorator is called :func:`contextmanager`, and lets you write a single -generator function instead of defining a new class. The generator should yield -exactly one value. The code up to the :keyword:`yield` will be executed as the -:meth:`~object.__enter__` method, and the value yielded will be the method's return -value that will get bound to the variable in the ':keyword:`with`' statement's -:keyword:`!as` clause, if any. The code after the :keyword:`!yield` will be -executed in the :meth:`~object.__exit__` method. Any exception raised in the block will -be raised by the :keyword:`!yield` statement. +The decorator is called :func:`~contextlib.contextmanager`, and lets you write +a single generator function instead of defining a new class. The generator +should yield exactly one value. The code up to the :keyword:`yield` will be +executed as the :meth:`~object.__enter__` method, and the value yielded will +be the method's return value that will get bound to the variable in the +':keyword:`with`' statement's :keyword:`!as` clause, if any. The code after +the :keyword:`!yield` will be executed in the :meth:`~object.__exit__` method. +Any exception raised in the block will be raised by the :keyword:`!yield` +statement. Using this decorator, our database example from the previous section could be written as:: @@ -469,7 +470,7 @@ statement both starts a database transaction and acquires a thread lock:: with nested (db_transaction(db), lock) as (cursor, locked): ... -Finally, the :func:`closing` function returns its argument so that it can be +Finally, the :func:`~contextlib.closing` function returns its argument so that it can be bound to a variable, and calls the argument's ``.close()`` method at the end of the block. :: @@ -538,7 +539,7 @@ If you don't like the default directory, it can be overridden by an environment variable. :envvar:`PYTHONUSERBASE` sets the root directory used for all Python versions supporting this feature. On Windows, the directory for application-specific data can be changed by -setting the :envvar:`APPDATA` environment variable. You can also +setting the :envvar:`!APPDATA` environment variable. You can also modify the :file:`site.py` file for your Python installation. The feature can be disabled entirely by running Python with the @@ -568,11 +569,12 @@ The :mod:`multiprocessing` module started out as an exact emulation of the :mod:`threading` module using processes instead of threads. That goal was discarded along the path to Python 2.6, but the general approach of the module is still similar. The fundamental class -is the :class:`Process`, which is passed a callable object and -a collection of arguments. The :meth:`start` method +is the :class:`~multiprocessing.Process`, which is passed a callable object and +a collection of arguments. The :meth:`~multiprocessing.Process.start` method sets the callable running in a subprocess, after which you can call -the :meth:`is_alive` method to check whether the subprocess is still running -and the :meth:`join` method to wait for the process to exit. +the :meth:`~multiprocessing.Process.is_alive` method to check whether the +subprocess is still running and the :meth:`~multiprocessing.Process.join` +method to wait for the process to exit. Here's a simple example where the subprocess will calculate a factorial. The function doing the calculation is written strangely so @@ -619,13 +621,16 @@ the object to communicate. (If the parent were to change the value of the global variable, the child's value would be unaffected, and vice versa.) -Two other classes, :class:`Pool` and :class:`Manager`, provide -higher-level interfaces. :class:`Pool` will create a fixed number of -worker processes, and requests can then be distributed to the workers -by calling :meth:`apply` or :meth:`apply_async` to add a single request, -and :meth:`map` or :meth:`map_async` to add a number of -requests. The following code uses a :class:`Pool` to spread requests -across 5 worker processes and retrieve a list of results:: +Two other classes, :class:`~multiprocessing.pool.Pool` and +:class:`~multiprocessing.Manager`, provide higher-level interfaces. +:class:`~multiprocessing.pool.Pool` will create a fixed number of worker +processes, and requests can then be distributed to the workers by calling +:meth:`~multiprocessing.pool.Pool.apply` or +:meth:`~multiprocessing.pool.Pool.apply_async` to add a single request, and +:meth:`~multiprocessing.pool.Pool.map` or +:meth:`~multiprocessing.pool.Pool.map_async` to add a number of +requests. The following code uses a :class:`~multiprocessing.pool.Pool` to +spread requests across 5 worker processes and retrieve a list of results:: from multiprocessing import Pool @@ -646,15 +651,18 @@ This produces the following output:: 33452526613163807108170062053440751665152000000000 ... -The other high-level interface, the :class:`Manager` class, creates a -separate server process that can hold master copies of Python data +The other high-level interface, the :class:`~multiprocessing.Manager` class, +creates a separate server process that can hold master copies of Python data structures. Other processes can then access and modify these data structures using proxy objects. The following example creates a shared dictionary by calling the :meth:`dict` method; the worker processes then insert values into the dictionary. (Locking is not done for you automatically, which doesn't matter in this example. -:class:`Manager`'s methods also include :meth:`Lock`, :meth:`RLock`, -and :meth:`Semaphore` to create shared locks.) +:class:`~multiprocessing.Manager`'s methods also include +:meth:`~multiprocessing.managers.SyncManager.Lock`, +:meth:`~multiprocessing.managers.SyncManager.RLock`, +and :meth:`~multiprocessing.managers.SyncManager.Semaphore` to create +shared locks.) :: @@ -824,7 +832,7 @@ documentation for a :ref:`complete list `; here's a sample: format, followed by a percent sign. ===== ======================================================================== -Classes and types can define a :meth:`__format__` method to control how they're +Classes and types can define a :meth:`~object.__format__` method to control how they're formatted. It receives a single argument, the format specifier:: def __format__(self, format_spec): @@ -834,7 +842,7 @@ formatted. It receives a single argument, the format specifier:: return str(self) There's also a :func:`format` builtin that will format a single -value. It calls the type's :meth:`__format__` method with the +value. It calls the type's :meth:`~object.__format__` method with the provided specifier:: >>> format(75.6564, '.2f') @@ -997,9 +1005,10 @@ sequence of bytes:: u'\u31ef \u3244' Byte arrays support most of the methods of string types, such as -:meth:`startswith`/:meth:`endswith`, :meth:`find`/:meth:`rfind`, -and some of the methods of lists, such as :meth:`append`, -:meth:`pop`, and :meth:`reverse`. +:meth:`~bytearray.startswith`/:meth:`~bytearray.endswith`, +:meth:`~bytearray.find`/:meth:`~bytearray.rfind`, +and some of the methods of lists, such as :meth:`~bytearray.append`, +:meth:`~bytearray.pop`, and :meth:`~bytearray.reverse`. :: @@ -1028,56 +1037,58 @@ PEP 3116: New I/O Library Python's built-in file objects support a number of methods, but file-like objects don't necessarily support all of them. Objects that -imitate files usually support :meth:`read` and :meth:`write`, but they -may not support :meth:`readline`, for example. Python 3.0 introduces -a layered I/O library in the :mod:`io` module that separates buffering -and text-handling features from the fundamental read and write -operations. +imitate files usually support :meth:`!read` and +:meth:`!write`, but they may not support :meth:`!readline`, +for example. Python 3.0 introduces a layered I/O library in the :mod:`io` +module that separates buffering and text-handling features from the +fundamental read and write operations. There are three levels of abstract base classes provided by the :mod:`io` module: -* :class:`RawIOBase` defines raw I/O operations: :meth:`read`, - :meth:`readinto`, - :meth:`write`, :meth:`seek`, :meth:`tell`, :meth:`truncate`, - and :meth:`close`. +* :class:`~io.RawIOBase` defines raw I/O operations: :meth:`~io.RawIOBase.read`, + :meth:`~io.RawIOBase.readinto`, :meth:`~io.RawIOBase.write`, + :meth:`~io.IOBase.seek`, :meth:`~io.IOBase.tell`, :meth:`~io.IOBase.truncate`, + and :meth:`~io.IOBase.close`. Most of the methods of this class will often map to a single system call. - There are also :meth:`readable`, :meth:`writable`, and :meth:`seekable` - methods for determining what operations a given object will allow. + There are also :meth:`~io.IOBase.readable`, :meth:`~io.IOBase.writable`, + and :meth:`~io.IOBase.seekable` methods for determining what operations a + given object will allow. Python 3.0 has concrete implementations of this class for files and sockets, but Python 2.6 hasn't restructured its file and socket objects in this way. -* :class:`BufferedIOBase` is an abstract base class that +* :class:`~io.BufferedIOBase` is an abstract base class that buffers data in memory to reduce the number of system calls used, making I/O processing more efficient. - It supports all of the methods of :class:`RawIOBase`, - and adds a :attr:`raw` attribute holding the underlying raw object. + It supports all of the methods of :class:`~io.RawIOBase`, + and adds a :attr:`~io.BufferedIOBase.raw` attribute holding the underlying + raw object. There are five concrete classes implementing this ABC. - :class:`BufferedWriter` and :class:`BufferedReader` are for objects - that support write-only or read-only usage that have a :meth:`seek` - method for random access. :class:`BufferedRandom` objects support + :class:`~io.BufferedWriter` and :class:`~io.BufferedReader` are for objects + that support write-only or read-only usage that have a :meth:`~io.IOBase.seek` + method for random access. :class:`~io.BufferedRandom` objects support read and write access upon the same underlying stream, and - :class:`BufferedRWPair` is for objects such as TTYs that have both + :class:`~io.BufferedRWPair` is for objects such as TTYs that have both read and write operations acting upon unconnected streams of data. - The :class:`BytesIO` class supports reading, writing, and seeking + The :class:`~io.BytesIO` class supports reading, writing, and seeking over an in-memory buffer. .. index:: single: universal newlines; What's new -* :class:`TextIOBase`: Provides functions for reading and writing +* :class:`~io.TextIOBase`: Provides functions for reading and writing strings (remember, strings will be Unicode in Python 3.0), - and supporting :term:`universal newlines`. :class:`TextIOBase` defines + and supporting :term:`universal newlines`. :class:`~io.TextIOBase` defines the :meth:`readline` method and supports iteration upon objects. - There are two concrete implementations. :class:`TextIOWrapper` + There are two concrete implementations. :class:`~io.TextIOWrapper` wraps a buffered I/O object, supporting all of the methods for - text I/O and adding a :attr:`buffer` attribute for access - to the underlying object. :class:`StringIO` simply buffers + text I/O and adding a :attr:`~io.TextIOBase.buffer` attribute for access + to the underlying object. :class:`~io.StringIO` simply buffers everything in memory without ever writing anything to disk. (In Python 2.6, :class:`io.StringIO` is implemented in @@ -1161,7 +1172,7 @@ Some object-oriented languages such as Java support interfaces, declaring that a class has a given set of methods or supports a given access protocol. Abstract Base Classes (or ABCs) are an equivalent feature for Python. The ABC support consists of an :mod:`abc` module -containing a metaclass called :class:`ABCMeta`, special handling of +containing a metaclass called :class:`~abc.ABCMeta`, special handling of this metaclass by the :func:`isinstance` and :func:`issubclass` builtins, and a collection of basic ABCs that the Python developers think will be widely useful. Future versions of Python will probably @@ -1171,17 +1182,17 @@ Let's say you have a particular class and wish to know whether it supports dictionary-style access. The phrase "dictionary-style" is vague, however. It probably means that accessing items with ``obj[1]`` works. Does it imply that setting items with ``obj[2] = value`` works? -Or that the object will have :meth:`keys`, :meth:`values`, and :meth:`items` -methods? What about the iterative variants such as :meth:`iterkeys`? :meth:`copy` -and :meth:`update`? Iterating over the object with :func:`iter`? +Or that the object will have :meth:`!keys`, :meth:`!values`, and :meth:`!items` +methods? What about the iterative variants such as :meth:`!iterkeys`? +:meth:`!copy`and :meth:`!update`? Iterating over the object with :func:`!iter`? The Python 2.6 :mod:`collections` module includes a number of different ABCs that represent these distinctions. :class:`Iterable` -indicates that a class defines :meth:`__iter__`, and -:class:`Container` means the class defines a :meth:`__contains__` +indicates that a class defines :meth:`~object.__iter__`, and +:class:`Container` means the class defines a :meth:`~object.__contains__` method and therefore supports ``x in y`` expressions. The basic dictionary interface of getting items, setting items, and -:meth:`keys`, :meth:`values`, and :meth:`items`, is defined by the +:meth:`!keys`, :meth:`!values`, and :meth:`!items`, is defined by the :class:`MutableMapping` ABC. You can derive your own classes from a particular ABC @@ -1195,7 +1206,7 @@ to indicate they support that ABC's interface:: Alternatively, you could write the class without deriving from the desired ABC and instead register the class by -calling the ABC's :meth:`register` method:: +calling the ABC's :meth:`~abc.ABCMeta.register` method:: import collections @@ -1205,10 +1216,10 @@ calling the ABC's :meth:`register` method:: collections.MutableMapping.register(Storage) For classes that you write, deriving from the ABC is probably clearer. -The :meth:`register` method is useful when you've written a new +The :meth:`~abc.ABCMeta.register` method is useful when you've written a new ABC that can describe an existing type or class, or if you want to declare that some third-party class implements an ABC. -For example, if you defined a :class:`PrintableType` ABC, +For example, if you defined a :class:`!PrintableType` ABC, it's legal to do:: # Register Python's types @@ -1255,16 +1266,16 @@ metaclass in a class definition:: ... -In the :class:`Drawable` ABC above, the :meth:`draw_doubled` method +In the :class:`!Drawable` ABC above, the :meth:`!draw_doubled` method renders the object at twice its size and can be implemented in terms -of other methods described in :class:`Drawable`. Classes implementing +of other methods described in :class:`!Drawable`. Classes implementing this ABC therefore don't need to provide their own implementation -of :meth:`draw_doubled`, though they can do so. An implementation -of :meth:`draw` is necessary, though; the ABC can't provide +of :meth:`!draw_doubled`, though they can do so. An implementation +of :meth:`!draw` is necessary, though; the ABC can't provide a useful generic implementation. -You can apply the ``@abstractmethod`` decorator to methods such as -:meth:`draw` that must be implemented; Python will then raise an +You can apply the :deco:`~abc.abstractmethod` decorator to methods such as +:meth:`!draw` that must be implemented; Python will then raise an exception for classes that don't define the method. Note that the exception is only raised when you actually try to create an instance of a subclass lacking the method:: @@ -1288,7 +1299,7 @@ Abstract data attributes can be declared using the def readonly(self): return self._x -Subclasses must then define a :meth:`readonly` property. +Subclasses must then define a ``readonly`` property. .. seealso:: @@ -1528,8 +1539,8 @@ Some smaller changes made to the core Python language are: the :exc:`StopIteration` exception will be raised. (Backported in :issue:`2719`.) -* Tuples now have :meth:`index` and :meth:`count` methods matching the - list type's :meth:`index` and :meth:`count` methods:: +* Tuples now have :meth:`~tuple.index` and :meth:`~tuple.count` methods + matching the list type's :meth:`~list.index` and :meth:`~list.count` methods:: >>> t = (0,1,2,3,4,0,1,2) >>> t.index(3) @@ -2738,13 +2749,13 @@ numbers. .. ====================================================================== -The :mod:`future_builtins` module +The :mod:`!future_builtins` module -------------------------------------- Python 3.0 makes many changes to the repertoire of built-in functions, and most of the changes can't be introduced in the Python 2.x series because they would break compatibility. -The :mod:`future_builtins` module provides versions +The :mod:`!future_builtins` module provides versions of these built-in functions that can be imported when writing 3.0-compatible code. diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index caed3192be871d..7296296d144803 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -858,8 +858,8 @@ Some smaller changes made to the core Python language are: .. XXX bytearray doesn't seem to be documented -* When using :class:`@classmethod ` and - :class:`@staticmethod ` to wrap +* When using :deco:`classmethod` and + :deco:`staticmethod` to wrap methods as class or static methods, the wrapper object now exposes the wrapped function as their :attr:`~method.__func__` attribute. @@ -1541,7 +1541,7 @@ changes, or look through the Subversion logs for all the details. buffer API, which fixed a test suite failure (fix by Antoine Pitrou; :issue:`7133`) and automatically set OpenSSL's :c:macro:`!SSL_MODE_AUTO_RETRY`, which will prevent an error - code being returned from :meth:`recv` operations that trigger an SSL + code being returned from :meth:`!recv` operations that trigger an SSL renegotiation (fix by Antoine Pitrou; :issue:`8222`). The :func:`~ssl.SSLContext.wrap_socket` constructor function now takes a @@ -2031,7 +2031,7 @@ version 1.3. Some of the new features are: * ElementTree's code for converting trees to a string has been significantly reworked, making it roughly twice as fast in many cases. The :meth:`ElementTree.write() ` - and :meth:`Element.write` methods now have a *method* parameter that can be + and :meth:`!Element.write` methods now have a *method* parameter that can be "xml" (the default), "html", or "text". HTML mode will output empty elements as ```` instead of ````, and text mode will skip over elements and only output the text chunks. If @@ -2044,7 +2044,7 @@ version 1.3. Some of the new features are: Namespace handling has also been improved. All ``xmlns:`` declarations are now output on the root element, not scattered throughout the resulting XML. You can set the default namespace for a tree - by setting the :attr:`default_namespace` attribute and can + by setting the :attr:`!default_namespace` attribute and can register new prefixes with :meth:`~xml.etree.ElementTree.register_namespace`. In XML mode, you can use the true/false *xml_declaration* parameter to suppress the XML declaration. @@ -2196,8 +2196,6 @@ Changes to Python's build process and to the C API include: locale-independent way. (Added by Eric Smith; :issue:`5793`.) - .. XXX these macros don't seem to be described in the c-api docs. - * Removed function: :c:func:`!PyEval_CallObject` is now only available as a macro. A function version was being kept around to preserve ABI linking compatibility, but that was in 1997; it can certainly be @@ -2234,7 +2232,7 @@ Changes to Python's build process and to the C API include: * When using the :c:type:`PyMemberDef` structure to define attributes of a type, Python will no longer let you try to delete or set a - :c:macro:`T_STRING_INPLACE` attribute. + :c:macro:`!T_STRING_INPLACE` attribute. .. rev 79644 @@ -2259,12 +2257,12 @@ Changes to Python's build process and to the C API include: :issue:`6491`.) * The :program:`configure` script now checks for floating-point rounding bugs - on certain 32-bit Intel chips and defines a :c:macro:`X87_DOUBLE_ROUNDING` + on certain 32-bit Intel chips and defines a :c:macro:`!X87_DOUBLE_ROUNDING` preprocessor definition. No code currently uses this definition, but it's available if anyone wishes to use it. (Added by Mark Dickinson; :issue:`2937`.) - :program:`configure` also now sets a :envvar:`LDCXXSHARED` Makefile + :program:`configure` also now sets a :envvar:`!LDCXXSHARED` Makefile variable for supporting C++ linking. (Contributed by Arfrever Frehtes Taifersar Arahesis; :issue:`1222585`.) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 3c815721a92f8c..d90a18076e5488 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -551,11 +551,12 @@ Patterns and classes If you are using classes to structure your data, you can use as a pattern the class name followed by an argument list resembling a constructor. This -pattern has the ability to capture class attributes into variables:: +pattern has the ability to capture instance attributes into variables:: class Point: - x: int - y: int + def __init__(self, x, y): + self.x = x + self.y = y def location(point): match point: @@ -846,8 +847,8 @@ Other Language Changes respectively. (Contributed by Joshua Bronson, Daniel Pope, and Justin Wang in :issue:`31861`.) -* Static methods (:func:`@staticmethod `) and class methods - (:func:`@classmethod `) now inherit the method attributes +* Static methods (:deco:`staticmethod`) and class methods + (:deco:`classmethod`) now inherit the method attributes (``__module__``, ``__name__``, ``__qualname__``, ``__doc__``, ``__annotations__``) and have a new ``__wrapped__`` attribute. Moreover, static methods are now callable as regular functions. @@ -900,7 +901,7 @@ Improved Modules asyncio ------- -Add missing :meth:`~asyncio.events.AbstractEventLoop.connect_accepted_socket` +Add missing :meth:`~asyncio.loop.connect_accepted_socket` method. (Contributed by Alex Grönholm in :issue:`41332`.) @@ -932,7 +933,7 @@ Base32 Encoding with Extended Hex Alphabet. bdb --- -Add :meth:`~bdb.Breakpoint.clearBreakpoints` to reset all set breakpoints. +Add :meth:`!clearBreakpoints` to reset all set breakpoints. (Contributed by Irit Katriel in :issue:`24160`.) bisect @@ -1397,7 +1398,7 @@ A new verify flag :const:`~ssl.VERIFY_X509_PARTIAL_CHAIN` has been added. sqlite3 ------- -Add audit events for :func:`~sqlite3.connect/handle`, +Add audit events for :func:`~sqlite3.connect`, :meth:`~sqlite3.Connection.enable_load_extension`, and :meth:`~sqlite3.Connection.load_extension`. (Contributed by Erlend E. Aasland in :issue:`43762`.) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index a65f59c0a72315..2b6939cd323dcf 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1191,9 +1191,23 @@ Deprecated replaced by :data:`calendar.JANUARY` and :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) -* :mod:`collections.abc`: Deprecated :class:`!collections.abc.ByteString`. - Prefer :class:`Sequence` or :class:`collections.abc.Buffer`. - For use in typing, prefer a union, like ``bytes | bytearray``, or :class:`collections.abc.Buffer`. +* :mod:`collections.abc`: Deprecated :class:`collections.abc.ByteString`. + + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` implements + the :ref:`buffer protocol ` at runtime. For use in type + annotations, either use :class:`~collections.abc.Buffer` or a union + that explicitly specifies the types your code supports (e.g., + ``bytes | bytearray | memoryview``). + + :class:`!ByteString` was originally intended to be an abstract class that + would serve as a supertype of both :class:`bytes` and :class:`bytearray`. + However, since the ABC never had any methods, knowing that an object was an + instance of :class:`!ByteString` never actually told you anything useful + about the object. Other common buffer types such as :class:`memoryview` were + also never understood as subtypes of :class:`!ByteString` (either at + runtime or by static type checkers). + + See :pep:`PEP 688 <688#current-options>` for more details. (Contributed by Shantanu Jain in :gh:`91896`.) * :mod:`datetime`: :class:`datetime.datetime`'s :meth:`~datetime.datetime.utcnow` and @@ -1301,7 +1315,7 @@ Deprecated :class:`collections.abc.Hashable` and :class:`collections.abc.Sized` respectively, are deprecated. (:gh:`94309`.) - * :class:`!typing.ByteString`, deprecated since Python 3.9, now causes a + * :class:`typing.ByteString`, deprecated since Python 3.9, now causes a :exc:`DeprecationWarning` to be emitted when it is used. (Contributed by Alex Waygood in :gh:`91896`.) @@ -2233,6 +2247,8 @@ Deprecated .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-future.rst Removed diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e20e49325c01d5..092af5aaa1c97a 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -60,7 +60,7 @@ Summary -- Release Highlights .. This section singles out the most important changes in Python 3.13. Brevity is key. -Python 3.13 is the latest stable release of the Python programming +Python 3.13 is a stable release of the Python programming language, with a mix of changes to the language, the implementation and the standard library. The biggest changes include a new `interactive interpreter @@ -730,6 +730,22 @@ asyncio never awaited). (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.) +* The function and methods named ``create_task`` have received a new + ``**kwargs`` argument that is passed through to the task constructor. + This change was accidentally added in 3.13.3, + and broke the API contract for custom task factories. + Several third-party task factories implemented workarounds for this. + In 3.13.4 and later releases the old factory contract is honored + once again (until 3.14). + To keep the workarounds working, the extra ``**kwargs`` argument still + allows passing additional keyword arguments to :class:`~asyncio.Task` + and to custom task factories. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) base64 ------ @@ -818,7 +834,7 @@ dbm (Contributed by Raymond Hettinger and Erlend E. Aasland in :gh:`100414`.) * Allow removing all items from the database through - the new :meth:`.gdbm.clear` and :meth:`.ndbm.clear` methods. + the new :meth:`!clear` methods of the GDBM and NDBM database objects. (Contributed by Donghee Na in :gh:`107122`.) @@ -1553,8 +1569,6 @@ and are now removed: * :pypi:`bcrypt`: Modern password hashing for your software and your servers. - * :pypi:`passlib`: - Comprehensive password hashing framework supporting over 30 schemes. * :pypi:`argon2-cffi`: The secure Argon2 password hashing algorithm. * :pypi:`legacycrypt`: @@ -1980,7 +1994,7 @@ New Deprecations (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.) * Deprecate the :func:`typing.no_type_check_decorator` decorator function, - to be removed in in Python 3.15. + to be removed in Python 3.15. After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. (Contributed by Alex Waygood in :gh:`106309`.) @@ -2161,7 +2175,7 @@ New Features (Contributed by Sam Gross in :gh:`114329`.) * Add the :c:func:`PyList_Extend` and :c:func:`PyList_Clear` functions, - mirroring the Python :meth:`!list.extend` and :meth:`!list.clear` methods. + mirroring the Python :meth:`list.extend` and :meth:`list.clear` methods. (Contributed by Victor Stinner in :gh:`111138`.) * Add the :c:func:`PyLong_AsInt` function. @@ -2531,6 +2545,8 @@ Deprecated C APIs .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-3.18.rst .. include:: ../deprecations/c-api-pending-removal-in-future.rst @@ -2577,7 +2593,7 @@ Build Changes * The :file:`configure` option :option:`--with-system-libmpdec` now defaults to ``yes``. - The bundled copy of ``libmpdecimal`` will be removed in Python 3.15. + The bundled copy of ``libmpdec`` will be removed in Python 3.16. * Python built with :file:`configure` :option:`--with-trace-refs` (tracing references) is now ABI compatible with the Python release build diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 894f011ec86a30..67159b1599bef5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1,8 +1,9 @@ + **************************** What's new in Python 3.14 **************************** -:Editor: Hugo van Kemenade +:Editors: Adam Turner and Hugo van Kemenade .. Rules for maintenance: @@ -45,404 +46,480 @@ when researching a change. This article explains the new features in Python 3.14, compared to 3.13. - +Python 3.14 was released on 7 October 2025. For full details, see the :ref:`changelog `. .. seealso:: :pep:`745` -- Python 3.14 release schedule -.. note:: - - Prerelease users should be aware that this document is currently in draft - form. It will be updated substantially as Python 3.14 moves towards release, - so it's worth checking back even after reading earlier versions. - -Summary -- release highlights +Summary -- Release highlights ============================= .. This section singles out the most important changes in Python 3.14. Brevity is key. -Python 3.14 beta is the pre-release of the next version of the Python -programming language, with a mix of changes to the language, the -implementation and the standard library. - -The biggest changes to the implementation include template strings (:pep:`750`), -deferred evaluation of annotations (:pep:`649`), -and a new type of interpreter that uses tail calls. - -The library changes include the addition of a new :mod:`!annotationlib` module -for introspecting and wrapping annotations (:pep:`649`), -a new :mod:`!compression.zstd` module for Zstandard support (:pep:`784`), -plus syntax highlighting in the REPL, +Python 3.14 is the latest stable release of the Python programming +language, with a mix of changes to the language, the implementation, +and the standard library. +The biggest changes include :ref:`template string literals +`, +:ref:`deferred evaluation of annotations `, +and support for :ref:`subinterpreters ` in +the standard library. + +The library changes include significantly improved capabilities for +:ref:`introspection in asyncio `, +:ref:`support for Zstandard ` via a new +:mod:`compression.zstd` module, syntax highlighting in the REPL, as well as the usual deprecations and removals, and improvements in user-friendliness and correctness. -.. PEP-sized items next. - -* :ref:`PEP 649: deferred evaluation of annotations ` -* :ref:`PEP 741: Python Configuration C API ` -* :ref:`PEP 750: Template strings ` -* :ref:`PEP 758: Allow except and except* expressions without parentheses ` -* :ref:`PEP 761: Discontinuation of PGP signatures ` -* :ref:`PEP 765: Disallow return/break/continue that exit a finally block ` -* :ref:`PEP 768: Safe external debugger interface for CPython ` -* :ref:`PEP 784: Adding Zstandard to the standard library ` -* :ref:`A new type of interpreter ` -* :ref:`Syntax highlighting in PyREPL `, - and color output in :ref:`unittest `, - :ref:`argparse `, - :ref:`json ` and - :ref:`calendar ` CLIs -* :ref:`Binary releases for the experimental just-in-time compiler ` - - -Incompatible changes -==================== - -On platforms other than macOS and Windows, the default :ref:`start -method ` for :mod:`multiprocessing` -and :class:`~concurrent.futures.ProcessPoolExecutor` switches from -*fork* to *forkserver*. - -See :ref:`(1) ` and -:ref:`(2) ` for details. - -If you encounter :exc:`NameError`\s or pickling errors coming out of -:mod:`multiprocessing` or :mod:`concurrent.futures`, see the -:ref:`forkserver restrictions `. - -The interpreter avoids some reference count modifications internally when -it's safe to do so. This can lead to different values returned from -:func:`sys.getrefcount` and :c:func:`Py_REFCNT` compared to previous versions -of Python. See :ref:`below ` for details. - -New features -============ - -.. _whatsnew314-pep750: - -PEP 750: Template strings -------------------------- - -Template string literals (t-strings) are a generalization of f-strings, -using a ``t`` in place of the ``f`` prefix. Instead of evaluating -to :class:`str`, t-strings evaluate to a new :class:`!string.templatelib.Template` type: +This article doesn't attempt to provide a complete specification +of all new features, but instead gives a convenient overview. +For full details refer to the documentation, +such as the :ref:`Library Reference ` +and :ref:`Language Reference `. +To understand the complete implementation and design rationale for a change, +refer to the PEP for a particular new feature; +but note that PEPs usually are not kept up-to-date +once a feature has been fully implemented. +See `Porting to Python 3.14`_ for guidance on upgrading from +earlier versions of Python. -.. code-block:: python - - from string.templatelib import Template - - name = "World" - template: Template = t"Hello {name}" - -The template can then be combined with functions that operate on the template's -structure to produce a :class:`str` or a string-like result. -For example, sanitizing input: - -.. code-block:: python - - evil = "" - template = t"

{evil}

" - assert html(template) == "

<script>alert('evil')</script>

" - -As another example, generating HTML attributes from data: - -.. code-block:: python - - attributes = {"src": "shrubbery.jpg", "alt": "looks nice"} - template = t"" - assert html(template) == 'looks nice' - -Unlike f-strings, the ``html`` function has access to template attributes -containing the original information: static strings, interpolations, and values -from the original scope. Unlike existing templating approaches, t-strings build -from the well-known f-string syntax and rules. Template systems thus benefit -from Python tooling as they are much closer to the Python language, syntax, -scoping, and more. - -Writing template handlers is straightforward: +-------------- -.. code-block:: python +.. PEP-sized items next. - from string.templatelib import Template, Interpolation +Interpreter improvements: - def lower_upper(template: Template) -> str: - """Render static parts lowercased and interpolations uppercased.""" - parts: list[str] = [] - for item in template: - if isinstance(item, Interpolation): - parts.append(str(item.value).upper()) - else: - parts.append(item.lower()) - return "".join(parts) +* :pep:`649` and :pep:`749`: :ref:`Deferred evaluation of annotations + ` +* :pep:`734`: :ref:`Multiple interpreters in the standard library + ` +* :pep:`750`: :ref:`Template strings ` +* :pep:`758`: :ref:`Allow except and except* expressions without brackets + ` +* :pep:`765`: :ref:`Control flow in finally blocks + ` +* :pep:`768`: :ref:`Safe external debugger interface for CPython + ` +* :ref:`A new type of interpreter ` +* :ref:`Free-threaded mode improvements ` +* :ref:`Improved error messages ` +* :ref:`Incremental garbage collection ` - name = "world" - assert lower_upper(t"HELLO {name}") == "hello WORLD" +Significant improvements in the standard library: -With this in place, developers can write template systems to sanitize SQL, make -safe shell operations, improve logging, tackle modern ideas in web development -(HTML, CSS, and so on), and implement lightweight, custom business DSLs. +* :pep:`784`: :ref:`Zstandard support in the standard library + ` +* :ref:`whatsnew314-asyncio-introspection` +* :ref:`whatsnew314-concurrent-warnings-control` +* :ref:`Syntax highlighting in the default interactive shell + `, and color output in several + standard library CLIs -(Contributed by Jim Baker, Guido van Rossum, Paul Everitt, Koudai Aono, -Lysandros Nikolaou, Dave Peck, Adam Turner, Jelle Zijlstra, Bénédikt Tran, -and Pablo Galindo Salgado in :gh:`132661`.) +C API improvements: -.. seealso:: - :pep:`750`. +* :pep:`741`: :ref:`Python configuration C API ` +Platform support: -.. _whatsnew314-pep768: +* :pep:`776`: Emscripten is now an :ref:`officially supported platform + `, at :pep:`tier 3 <11#tier-3>`. -PEP 768: Safe external debugger interface for CPython ------------------------------------------------------ +Release changes: -:pep:`768` introduces a zero-overhead debugging interface that allows debuggers and profilers -to safely attach to running Python processes. This is a significant enhancement to Python's -debugging capabilities allowing debuggers to forego unsafe alternatives. See -:ref:`below ` for how this feature is leveraged to -implement the new :mod:`pdb` module's remote attaching capabilities. +* :pep:`779`: :ref:`Free-threaded Python is officially supported + ` +* :pep:`761`: :ref:`PGP signatures have been discontinued for official releases + ` +* :ref:`Windows and macOS binary releases now support the experimental + just-in-time compiler ` +* :ref:`Binary releases for Android are now provided ` -The new interface provides safe execution points for attaching debugger code without modifying -the interpreter's normal execution path or adding runtime overhead. This enables tools to -inspect and interact with Python applications in real-time without stopping or restarting -them — a crucial capability for high-availability systems and production environments. -For convenience, CPython implements this interface through the :mod:`sys` module with a -:func:`sys.remote_exec` function:: +New features +============ - sys.remote_exec(pid, script_path) +.. _whatsnew314-deferred-annotations: -This function allows sending Python code to be executed in a target process at the next safe -execution point. However, tool authors can also implement the protocol directly as described -in the PEP, which details the underlying mechanisms used to safely attach to running processes. +:pep:`649` & :pep:`749`: Deferred evaluation of annotations +------------------------------------------------------------ -Here's a simple example that inspects object types in a running Python process: +The :term:`annotations ` on functions, classes, and modules are no +longer evaluated eagerly. Instead, annotations are stored in special-purpose +:term:`annotate functions ` and evaluated only when +necessary (except if ``from __future__ import annotations`` is used). - .. code-block:: python +This change is designed to improve performance and usability of annotations +in Python in most circumstances. The runtime cost for defining annotations is +minimized, but it remains possible to introspect annotations at runtime. +It is no longer necessary to enclose annotations in strings if they +contain forward references. - import os - import sys - import tempfile +The new :mod:`annotationlib` module provides tools for inspecting deferred +annotations. Annotations may be evaluated in the :attr:`~annotationlib.Format.VALUE` +format (which evaluates annotations to runtime values, similar to the behavior in +earlier Python versions), the :attr:`~annotationlib.Format.FORWARDREF` format +(which replaces undefined names with special markers), and the +:attr:`~annotationlib.Format.STRING` format (which returns annotations as strings). - # Create a temporary script - with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: - script_path = f.name - f.write(f"import my_debugger; my_debugger.connect({os.getpid()})") - try: - # Execute in process with PID 1234 - print("Behold! An offering:") - sys.remote_exec(1234, script_path) - finally: - os.unlink(script_path) +This example shows how these formats behave: -The debugging interface has been carefully designed with security in mind and includes several -mechanisms to control access: +.. doctest:: -* A :envvar:`PYTHON_DISABLE_REMOTE_DEBUG` environment variable. -* A :option:`-X disable-remote-debug` command-line option. -* A :option:`--without-remote-debug` configure flag to completely disable the feature at build time. + >>> from annotationlib import get_annotations, Format + >>> def func(arg: Undefined): + ... pass + >>> get_annotations(func, format=Format.VALUE) + Traceback (most recent call last): + ... + NameError: name 'Undefined' is not defined + >>> get_annotations(func, format=Format.FORWARDREF) + {'arg': ForwardRef('Undefined', owner=)} + >>> get_annotations(func, format=Format.STRING) + {'arg': 'Undefined'} -A key implementation detail is that the interface piggybacks on the interpreter's existing evaluation -loop and safe points, ensuring zero overhead during normal execution while providing a reliable way -for external processes to coordinate debugging operations. +The :ref:`porting ` section contains guidance +on changes that may be needed due to these changes, though in the majority of +cases, code will continue working as-is. -(Contributed by Pablo Galindo Salgado, Matt Wozniski, and Ivona Stojanovic in :gh:`131591`.) +(Contributed by Jelle Zijlstra in :pep:`749` and :gh:`119180`; +:pep:`649` was written by Larry Hastings.) .. seealso:: - :pep:`768`. + :pep:`649` + Deferred Evaluation Of Annotations Using Descriptors + :pep:`749` + Implementing PEP 649 + + +.. _whatsnew314-multiple-interpreters: + +:pep:`734`: Multiple interpreters in the standard library +--------------------------------------------------------- + +The CPython runtime supports running multiple copies of Python in the +same process simultaneously and has done so for over 20 years. +Each of these separate copies is called an 'interpreter'. +However, the feature had been available only through +the :ref:`C-API `. + +That limitation is removed in Python 3.14, +with the new :mod:`concurrent.interpreters` module. + +There are at least two notable reasons why using multiple interpreters +has significant benefits: + +* they support a new (to Python), human-friendly concurrency model +* true multi-core parallelism + +For some use cases, concurrency in software improves efficiency and +can simplify design, at a high level. +At the same time, implementing and maintaining all but the simplest concurrency +is often a struggle for the human brain. +That especially applies to plain threads (for example, :mod:`threading`), +where all memory is shared between all threads. + +With multiple isolated interpreters, you can take advantage of a class +of concurrency models, like Communicating Sequential Processes (CSP) +or the actor model, that have found +success in other programming languages, like Smalltalk, Erlang, +Haskell, and Go. Think of multiple interpreters as threads +but with opt-in sharing. + +Regarding multi-core parallelism: as of Python 3.12, interpreters +are now sufficiently isolated from one another to be used in parallel +(see :pep:`684`). This unlocks a variety of CPU-intensive use cases +for Python that were limited by the :term:`GIL`. + +Using multiple interpreters is similar in many ways to +:mod:`multiprocessing`, in that they both provide isolated logical +"processes" that can run in parallel, with no sharing by default. +However, when using multiple interpreters, an application will use +fewer system resources and will operate more efficiently (since it +stays within the same process). Think of multiple interpreters as +having the isolation of processes with the efficiency of threads. + +.. XXX Add an example or two. +.. XXX Link to the not-yet-added HOWTO doc. + +While the feature has been around for decades, multiple interpreters +have not been used widely, due to low awareness and the lack of a +standard library module. Consequently, they currently have several +notable limitations, which are expected to improve significantly now +that the feature is going mainstream. + +Current limitations: + +* starting each interpreter has not been optimized yet +* each interpreter uses more memory than necessary + (work continues on extensive internal sharing between interpreters) +* there aren't many options *yet* for truly sharing objects or other + data between interpreters (other than :type:`memoryview`) +* many third-party extension modules on PyPI are not yet compatible + with multiple interpreters + (all standard library extension modules *are* compatible) +* the approach to writing applications that use multiple isolated + interpreters is mostly unfamiliar to Python users, for now + +The impact of these limitations will depend on future CPython +improvements, how interpreters are used, and what the community solves +through PyPI packages. Depending on the use case, the limitations may +not have much impact, so try it out! + +Furthermore, future CPython releases will reduce or eliminate overhead +and provide utilities that are less appropriate on PyPI. In the +meantime, most of the limitations can also be addressed through +extension modules, meaning PyPI packages can fill any gap for 3.14, and +even back to 3.12 where interpreters were finally properly isolated and +stopped sharing the :term:`GIL`. Likewise, libraries on PyPI are expected +to emerge for high-level abstractions on top of interpreters. + +Regarding extension modules, work is in progress to update some PyPI +projects, as well as tools like Cython, pybind11, nanobind, and PyO3. +The steps for isolating an extension module are found at +:ref:`isolating-extensions-howto`. +Isolating a module has a lot of overlap with what is required to support +:ref:`free-threading `, so the ongoing +work in the community in that area will help accelerate support +for multiple interpreters. + +Also added in 3.14: :ref:`concurrent.futures.InterpreterPoolExecutor +`. + +(Contributed by Eric Snow in :gh:`134939`.) + +.. seealso:: :pep:`734` + + +.. _whatsnew314-template-string-literals: + +:pep:`750`: Template string literals +------------------------------------ + +Template strings are a new mechanism for custom string processing. +They share the familiar syntax of f-strings but, unlike f-strings, +return an object representing the static and interpolated parts of +the string, instead of a simple :class:`str`. + +To write a t-string, use a ``'t'`` prefix instead of an ``'f'``: -.. _whatsnew314-pep784: - -PEP 784: Adding Zstandard to the standard library -------------------------------------------------- - -The new ``compression`` package contains modules :mod:`!compression.lzma`, -:mod:`!compression.bz2`, :mod:`!compression.gzip` and :mod:`!compression.zlib` -which re-export the :mod:`lzma`, :mod:`bz2`, :mod:`gzip` and :mod:`zlib` -modules respectively. The new import names under ``compression`` are the -canonical names for importing these compression modules going forward. However, -the existing modules names have not been deprecated. Any deprecation or removal -of the existing compression modules will occur no sooner than five years after -the release of 3.14. - -The new :mod:`!compression.zstd` module provides compression and decompression -APIs for the Zstandard format via bindings to `Meta's zstd library -`__. Zstandard is a widely adopted, highly -efficient, and fast compression format. In addition to the APIs introduced in -:mod:`!compression.zstd`, support for reading and writing Zstandard compressed -archives has been added to the :mod:`tarfile`, :mod:`zipfile`, and -:mod:`shutil` modules. +.. doctest:: -Here's an example of using the new module to compress some data: + >>> variety = 'Stilton' + >>> template = t'Try some {variety} cheese!' + >>> type(template) + -.. code-block:: python +:class:`~string.templatelib.Template` objects provide access to the static +and interpolated (in curly braces) parts of a string *before* they are combined. +Iterate over :class:`!Template` instances to access their parts in order: - from compression import zstd - import math +.. testsetup:: - data = str(math.pi).encode() * 20 + variety = 'Stilton' + template = t'Try some {variety} cheese!' - compressed = zstd.compress(data) +.. doctest:: - ratio = len(compressed) / len(data) - print(f"Achieved compression ratio of {ratio}") + >>> list(template) + ['Try some ', Interpolation('Stilton', 'variety', None, ''), ' cheese!'] -As can be seen, the API is similar to the APIs of the :mod:`!lzma` and -:mod:`!bz2` modules. +It's easy to write (or call) code to process :class:`!Template` instances. +For example, here's a function that renders static parts lowercase and +:class:`~string.templatelib.Interpolation` instances uppercase: -(Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, -Victor Stinner, and Rogdham in :gh:`132983`) +.. code-block:: python -.. seealso:: - :pep:`784`. + from string.templatelib import Interpolation + def lower_upper(template): + """Render static parts lowercase and interpolations uppercase.""" + parts = [] + for part in template: + if isinstance(part, Interpolation): + parts.append(str(part.value).upper()) + else: + parts.append(part.lower()) + return ''.join(parts) -.. _whatsnew314-remote-pdb: + name = 'Wenslydale' + template = t'Mister {name}' + assert lower_upper(template) == 'mister WENSLYDALE' -Remote attaching to a running Python process with PDB ------------------------------------------------------ +Because :class:`!Template` instances distinguish between static strings and +interpolations at runtime, they can be useful for sanitising user input. +Writing a :func:`!html` function that escapes user input in HTML is an exercise +left to the reader! +Template processing code can provide improved flexibility. +For instance, a more advanced :func:`!html` function could accept +a :class:`!dict` of HTML attributes directly in the template: -The :mod:`pdb` module now supports remote attaching to a running Python process -using a new ``-p PID`` command-line option: +.. code-block:: python -.. code-block:: sh + attributes = {'src': 'limburger.jpg', 'alt': 'lovely cheese'} + template = t'' + assert html(template) == 'lovely cheese' - python -m pdb -p 1234 +Of course, template processing code does not need to return a string-like result. +An even *more* advanced :func:`!html` could return a custom type representing +a DOM-like structure. -This will connect to the Python process with the given PID and allow you to -debug it interactively. Notice that due to how the Python interpreter works -attaching to a remote process that is blocked in a system call or waiting for -I/O will only work once the next bytecode instruction is executed or when the -process receives a signal. +With t-strings in place, developers can write systems that sanitise SQL, +make safe shell operations, improve logging, tackle modern ideas in web +development (HTML, CSS, and so on), and implement lightweight custom business DSLs. -This feature uses :pep:`768` and the :func:`sys.remote_exec` function -to attach to the remote process and send the PDB commands to it. +(Contributed by Jim Baker, Guido van Rossum, Paul Everitt, Koudai Aono, +Lysandros Nikolaou, Dave Peck, Adam Turner, Jelle Zijlstra, Bénédikt Tran, +and Pablo Galindo Salgado in :gh:`132661`.) +.. seealso:: :pep:`750`. -(Contributed by Matt Wozniski and Pablo Galindo in :gh:`131591`.) -.. seealso:: - :pep:`768`. +.. _whatsnew314-remote-debugging: +:pep:`768`: Safe external debugger interface +-------------------------------------------- -.. _whatsnew314-pep758: +Python 3.14 introduces a zero-overhead debugging interface that allows +debuggers and profilers to safely attach to running Python processes +without stopping or restarting them. +This is a significant enhancement to Python's debugging capabilities, +meaning that unsafe alternatives are no longer required. -PEP 758 – Allow except and except* expressions without parentheses ------------------------------------------------------------------- +The new interface provides safe execution points for attaching debugger code +without modifying the interpreter's normal execution path +or adding any overhead at runtime. +Due to this, tools can now inspect and interact with Python applications +in real-time, which is a crucial capability for high-availability systems +and production environments. -The :keyword:`except` and :keyword:`except* ` expressions now allow -parentheses to be omitted when there are multiple exception types and the ``as`` clause is not used. -For example the following expressions are now valid: +For convenience, this interface is implemented in the :func:`sys.remote_exec` +function. For example: .. code-block:: python - try: - release_new_sleep_token_album() - except AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") - - # The same applies to except* (for exception groups): - try: - release_new_sleep_token_album() - except* AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") - -Check :pep:`758` for more details. + import sys + from tempfile import NamedTemporaryFile -(Contributed by Pablo Galindo and Brett Cannon in :gh:`131831`.) + with NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + script_path = f.name + f.write(f'import my_debugger; my_debugger.connect({os.getpid()})') -.. seealso:: - :pep:`758`. + # Execute in process with PID 1234 + print('Behold! An offering:') + sys.remote_exec(1234, script_path) -.. _whatsnew314-pep649: +This function allows sending Python code to be executed in a target process +at the next safe execution point. +However, tool authors can also implement the protocol directly as described +in the PEP, which details the underlying mechanisms used to safely attach to +running processes. -PEP 649: deferred evaluation of annotations -------------------------------------------- +The debugging interface has been carefully designed with security in mind +and includes several mechanisms to control access: -The :term:`annotations ` on functions, classes, and modules are no -longer evaluated eagerly. Instead, annotations are stored in special-purpose -:term:`annotate functions ` and evaluated only when -necessary. This is specified in :pep:`649` and :pep:`749`. +* A :envvar:`PYTHON_DISABLE_REMOTE_DEBUG` environment variable. +* A :option:`-X disable-remote-debug` command-line option. +* A :option:`--without-remote-debug` configure flag to completely disable + the feature at build time. -This change is designed to make annotations in Python more performant and more -usable in most circumstances. The runtime cost for defining annotations is -minimized, but it remains possible to introspect annotations at runtime. -It is usually no longer necessary to enclose annotations in strings if they -contain forward references. +(Contributed by Pablo Galindo Salgado, Matt Wozniski, and Ivona Stojanovic +in :gh:`131591`.) -The new :mod:`annotationlib` module provides tools for inspecting deferred -annotations. Annotations may be evaluated in the :attr:`~annotationlib.Format.VALUE` -format (which evaluates annotations to runtime values, similar to the behavior in -earlier Python versions), the :attr:`~annotationlib.Format.FORWARDREF` format -(which replaces undefined names with special markers), and the -:attr:`~annotationlib.Format.STRING` format (which returns annotations as strings). +.. seealso:: :pep:`768`. -This example shows how these formats behave: -.. doctest:: +.. _whatsnew314-tail-call-interpreter: - >>> from annotationlib import get_annotations, Format - >>> def func(arg: Undefined): - ... pass - >>> get_annotations(func, format=Format.VALUE) - Traceback (most recent call last): - ... - NameError: name 'Undefined' is not defined - >>> get_annotations(func, format=Format.FORWARDREF) - {'arg': ForwardRef('Undefined', owner=)} - >>> get_annotations(func, format=Format.STRING) - {'arg': 'Undefined'} +A new type of interpreter +------------------------- -Implications for annotated code -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A new type of interpreter has been added to CPython. +It uses tail calls between small C functions that implement individual +Python opcodes, rather than one large C ``case`` statement. +For certain newer compilers, this interpreter provides +significantly better performance. Preliminary benchmarks suggest a geometric +mean of 3-5% faster on the standard ``pyperformance`` benchmark suite, +depending on platform and architecture. +The baseline is Python 3.14 built with Clang 19, without this new interpreter. -If you define annotations in your code (for example, for use with a static type -checker), then this change probably does not affect you: you can keep -writing annotations the same way you did with previous versions of Python. +This interpreter currently only works with Clang 19 and newer +on x86-64 and AArch64 architectures. +However, a future release of GCC is expected to support this as well. -You will likely be able to remove quoted strings in annotations, which are frequently -used for forward references. Similarly, if you use ``from __future__ import annotations`` -to avoid having to write strings in annotations, you may well be able to -remove that import. However, if you rely on third-party libraries that read annotations, -those libraries may need changes to support unquoted annotations before they -work as expected. +This feature is opt-in for now. Enabling profile-guided optimization is highly +recommendeded when using the new interpreter as it is the only configuration +that has been tested and validated for improved performance. +For further information, see :option:`--with-tail-call-interp`. -Implications for readers of ``__annotations__`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. note:: -If your code reads the ``__annotations__`` attribute on objects, you may want -to make changes in order to support code that relies on deferred evaluation of -annotations. For example, you may want to use :func:`annotationlib.get_annotations` -with the :attr:`~annotationlib.Format.FORWARDREF` format, as the :mod:`dataclasses` -module now does. + This is not to be confused with `tail call optimization`__ of Python + functions, which is currently not implemented in CPython. -Related changes -^^^^^^^^^^^^^^^ + This new interpreter type is an internal implementation detail of the CPython + interpreter. It doesn't change the visible behavior of Python programs at + all. It can improve their performance, but doesn't change anything else. -The changes in Python 3.14 are designed to rework how ``__annotations__`` -works at runtime while minimizing breakage to code that contains -annotations in source code and to code that reads ``__annotations__``. However, -if you rely on undocumented details of the annotation behavior or on private -functions in the standard library, there are many ways in which your code may -not work in Python 3.14. To safeguard your code against future changes, -use only the documented functionality of the :mod:`annotationlib` module. + __ https://en.wikipedia.org/wiki/Tail_call -``from __future__ import annotations`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(Contributed by Ken Jin in :gh:`128563`, with ideas on how to implement this +in CPython by Mark Shannon, Garrett Gu, Haoran Xu, and Josh Haberman.) -In Python 3.7, :pep:`563` introduced the ``from __future__ import annotations`` -directive, which turns all annotations into strings. This directive is now -considered deprecated and it is expected to be removed in a future version of Python. -However, this removal will not happen until after Python 3.13, the last version of -Python without deferred evaluation of annotations, reaches its end of life in 2029. -In Python 3.14, the behavior of code using ``from __future__ import annotations`` -is unchanged. +.. _whatsnew314-free-threaded-cpython: + +Free-threaded mode improvements +------------------------------- + +CPython's free-threaded mode (:pep:`703`), initially added in 3.13, +has been significantly improved in Python 3.14. +The implementation described in PEP 703 has been finished, including C API +changes, and temporary workarounds in the interpreter were replaced with +more permanent solutions. +The specializing adaptive interpreter (:pep:`659`) is now enabled +in free-threaded mode, which along with many other optimizations +greatly improves its performance. +The performance penalty on single-threaded code in free-threaded mode +is now roughly 5-10%, depending on the platform and C compiler used. + +From Python 3.14, when compiling extension modules for the free-threaded build of +CPython on Windows, the preprocessor variable ``Py_GIL_DISABLED`` now needs to +be specified by the build backend, as it will no longer be determined +automatically by the C compiler. For a running interpreter, the setting that +was used at compile time can be found using :func:`sysconfig.get_config_var`. + +The new :option:`-X context_aware_warnings <-X>` flag controls if +:ref:`concurrent safe warnings control ` +is enabled. The flag defaults to true for the free-threaded build +and false for the GIL-enabled build. + +A new :data:`~sys.flags.thread_inherit_context` flag has been added, +which if enabled means that threads created with :class:`threading.Thread` +start with a copy of the :class:`~contextvars.Context()` of the caller of +:meth:`~threading.Thread.start`. Most significantly, this makes the warning +filtering context established by :class:`~warnings.catch_warnings` be +"inherited" by threads (or asyncio tasks) started within that context. It also +affects other modules that use context variables, such as the :mod:`decimal` +context manager. +This flag defaults to true for the free-threaded build and false for +the GIL-enabled build. + +(Contributed by Sam Gross, Matt Page, Neil Schemenauer, Thomas Wouters, +Donghee Na, Kirill Podoprigora, Ken Jin, Itamar Oren, Brett Simmers, +Dino Viehland, Nathan Goldbaum, Ralf Gommers, Lysandros Nikolaou, Kumar Aditya, +Edgar Margffoy, and many others. +Some of these contributors are employed by Meta, which has continued to provide +significant engineering resources to support this project.) + + +.. _whatsnew314-improved-error-messages: Improved error messages ----------------------- @@ -463,47 +540,12 @@ Improved error messages ^^^^^^ SyntaxError: invalid syntax. Did you mean 'while'? - >>> asynch def fetch_data(): - ... pass - Traceback (most recent call last): - File "", line 1 - asynch def fetch_data(): - ^^^^^^ - SyntaxError: invalid syntax. Did you mean 'async'? - - >>> async def foo(): - ... awaid fetch_data() - Traceback (most recent call last): - File "", line 2 - awaid fetch_data() - ^^^^^ - SyntaxError: invalid syntax. Did you mean 'await'? - - >>> raisee ValueError("Error") - Traceback (most recent call last): - File "", line 1 - raisee ValueError("Error") - ^^^^^^ - SyntaxError: invalid syntax. Did you mean 'raise'? - While the feature focuses on the most common cases, some variations of misspellings may still result in regular syntax errors. (Contributed by Pablo Galindo in :gh:`132449`.) -* When unpacking assignment fails due to incorrect number of variables, the - error message prints the received number of values in more cases than before. - (Contributed by Tushar Sadhwani in :gh:`122239`.) - - .. code-block:: pycon - - >>> x, y, z = 1, 2, 3, 4 - Traceback (most recent call last): - File "", line 1, in - x, y, z = 1, 2, 3, 4 - ^^^^^^^ - ValueError: too many values to unpack (expected 3, got 4) - -* :keyword:`elif` statements that follow an :keyword:`else` block now have a specific error message. +* :keyword:`elif` statements that follow an :keyword:`else` block now have + a specific error message. (Contributed by Steele Farnsworth in :gh:`129902`.) .. code-block:: pycon @@ -519,11 +561,9 @@ Improved error messages ^^^^ SyntaxError: 'elif' block follows an 'else' block -* If a statement (:keyword:`pass`, :keyword:`del`, :keyword:`return`, - :keyword:`yield`, :keyword:`raise`, :keyword:`break`, :keyword:`continue`, - :keyword:`assert`, :keyword:`import`, :keyword:`from`) is passed to the - :ref:`if_expr` after :keyword:`else`, or one of :keyword:`pass`, - :keyword:`break`, or :keyword:`continue` is passed before :keyword:`if`, then the +* If a statement is passed to the :ref:`if_expr` after :keyword:`else`, + or one of :keyword:`pass`, :keyword:`break`, or :keyword:`continue` + is passed before :keyword:`if`, then the error message highlights where the :token:`~python-grammar:expression` is required. (Contributed by Sergey Miryanov in :gh:`129515`.) @@ -543,10 +583,9 @@ Improved error messages ^^^^^^^^ SyntaxError: expected expression before 'if', but statement is given - * When incorrectly closed strings are detected, the error message suggests - that the string may be intended to be part of the string. (Contributed by - Pablo Galindo in :gh:`88535`.) + that the string may be intended to be part of the string. + (Contributed by Pablo Galindo in :gh:`88535`.) .. code-block:: pycon @@ -555,8 +594,8 @@ Improved error messages SyntaxError: invalid syntax. Is this intended to be part of the string? * When strings have incompatible prefixes, the error now shows - which prefixes are incompatible. (Contributed by - Nikita Sobolev in :gh:`133197`.) + which prefixes are incompatible. + (Contributed by Nikita Sobolev in :gh:`133197`.) .. code-block:: pycon @@ -573,80 +612,101 @@ Improved error messages - Except handlers: ``except ... as ...`` - Pattern-match cases: ``case ... as ...`` - (Contributed by Nikita Sobolev in :gh:`123539`, - :gh:`123562`, and :gh:`123440`.) + (Contributed by Nikita Sobolev in :gh:`123539`, :gh:`123562`, and :gh:`123440`.) + +* Improved error message when trying to add an instance of an unhashable type to + a :class:`dict` or :class:`set`. + (Contributed by CF Bolz-Tereick and Victor Stinner in :gh:`132828`.) .. code-block:: pycon - >>> import ast as arr[0] - File "", line 1 - import ast as arr[0] - ^^^^^^ - SyntaxError: cannot use subscript as import target + >>> s = set() + >>> s.add({'pages': 12, 'grade': 'A'}) + Traceback (most recent call last): + File "", line 1, in + s.add({'pages': 12, 'grade': 'A'}) + ~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + TypeError: cannot use 'dict' as a set element (unhashable type: 'dict') + >>> d = {} + >>> l = [1, 2, 3] + >>> d[l] = 12 + Traceback (most recent call last): + File "", line 1, in + d[l] = 12 + ~^^^ + TypeError: cannot use 'list' as a dict key (unhashable type: 'list') + +* Improved error message when an object supporting the synchronous + context manager protocol is entered using :keyword:`async with` + instead of :keyword:`with`, + and vice versa for the asynchronous context manager protocol. + (Contributed by Bénédikt Tran in :gh:`128398`.) + -.. seealso:: - :pep:`649`. +.. _whatsnew314-zstandard: +:pep:`784`: Zstandard support in the standard library +----------------------------------------------------- -.. _whatsnew314-pep741: +The new :mod:`!compression` package contains modules :mod:`!compression.lzma`, +:mod:`!compression.bz2`, :mod:`!compression.gzip` and :mod:`!compression.zlib` +which re-export the :mod:`lzma`, :mod:`bz2`, :mod:`gzip` and :mod:`zlib` +modules respectively. The new import names under :mod:`!compression` are the +preferred names for importing these compression modules from Python 3.14. However, +the existing modules names have not been deprecated. Any deprecation or removal +of the existing compression modules will occur no sooner than five years after +the release of 3.14. -PEP 741: Python Configuration C API ------------------------------------ +The new :mod:`!compression.zstd` module provides compression and decompression +APIs for the Zstandard format via bindings to `Meta's zstd library +`__. Zstandard is a widely adopted, highly +efficient, and fast compression format. In addition to the APIs introduced in +:mod:`!compression.zstd`, support for reading and writing Zstandard compressed +archives has been added to the :mod:`tarfile`, :mod:`zipfile`, and +:mod:`shutil` modules. -Add a :ref:`PyInitConfig C API ` to configure the Python -initialization without relying on C structures and the ability to make -ABI-compatible changes in the future. +Here's an example of using the new module to compress some data: -Complete the :pep:`587` :ref:`PyConfig C API ` by adding -:c:func:`PyInitConfig_AddModule` which can be used to add a built-in extension -module; feature previously referred to as the “inittab”. +.. code-block:: python -Add :c:func:`PyConfig_Get` and :c:func:`PyConfig_Set` functions to get and set -the current runtime configuration. + from compression import zstd + import math -PEP 587 “Python Initialization Configuration” unified all the ways to configure -the Python initialization. This PEP unifies also the configuration of the -Python preinitialization and the Python initialization in a single API. -Moreover, this PEP only provides a single choice to embed Python, instead of -having two “Python” and “Isolated” choices (PEP 587), to simplify the API -further. + data = str(math.pi).encode() * 20 + compressed = zstd.compress(data) + ratio = len(compressed) / len(data) + print(f"Achieved compression ratio of {ratio}") -The lower level PEP 587 PyConfig API remains available for use cases with an -intentionally higher level of coupling to CPython implementation details (such -as emulating the full functionality of CPython’s CLI, including its -configuration mechanisms). +As can be seen, the API is similar to the APIs of the :mod:`!lzma` and +:mod:`!bz2` modules. -(Contributed by Victor Stinner in :gh:`107954`.) +(Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, +Victor Stinner, and Rogdham in :gh:`132983`.) + +.. seealso:: :pep:`784`. -.. seealso:: - :pep:`741`. .. _whatsnew314-asyncio-introspection: Asyncio introspection capabilities ---------------------------------- -Added a new command-line interface to inspect running Python processes using -asynchronous tasks, available via: - -.. code-block:: bash - - python -m asyncio ps PID +Added a new command-line interface to inspect running Python processes +using asynchronous tasks, available via ``python -m asyncio ps PID`` +or ``python -m asyncio pstree PID``. -This tool inspects the given process ID (PID) and displays information about -currently running asyncio tasks. It outputs a task table: a flat -listing of all tasks, their names, their coroutine stacks, and which tasks are -awaiting them. +The ``ps`` subcommand inspects the given process ID (PID) and displays +information about currently running asyncio tasks. +It outputs a task table: a flat listing of all tasks, their names, +their coroutine stacks, and which tasks are awaiting them. -.. code-block:: bash - - python -m asyncio pstree PID -This tool fetches the same information, but renders a visual async call tree, -showing coroutine relationships in a hierarchical format. This command is -particularly useful for debugging long-running or stuck asynchronous programs. -It can help developers quickly identify where a program is blocked, what tasks -are pending, and how coroutines are chained together. +The ``pstree`` subcommand fetches the same information, but instead renders a +visual async call tree, showing coroutine relationships in a hierarchical format. +This command is particularly useful for debugging long-running or stuck +asynchronous programs. +It can help developers quickly identify where a program is blocked, +what tasks are pending, and how coroutines are chained together. For example given this code: @@ -654,23 +714,25 @@ For example given this code: import asyncio - async def play(track): + async def play_track(track): await asyncio.sleep(5) - print(f"🎵 Finished: {track}") + print(f'🎵 Finished: {track}') - async def album(name, tracks): + async def play_album(name, tracks): async with asyncio.TaskGroup() as tg: for track in tracks: - tg.create_task(play(track), name=track) + tg.create_task(play_track(track), name=track) async def main(): async with asyncio.TaskGroup() as tg: tg.create_task( - album("Sundowning", ["TNDNBTG", "Levitate"]), name="Sundowning") + play_album('Sundowning', ['TNDNBTG', 'Levitate']), + name='Sundowning') tg.create_task( - album("TMBTE", ["DYWTYLM", "Aqua Regia"]), name="TMBTE") + play_album('TMBTE', ['DYWTYLM', 'Aqua Regia']), + name='TMBTE') - if __name__ == "__main__": + if __name__ == '__main__': asyncio.run(main()) Executing the new tool on the running process will yield a table like this: @@ -679,277 +741,298 @@ Executing the new tool on the running process will yield a table like this: python -m asyncio ps 12345 - tid task id task name coroutine chain awaiter name awaiter id - --------------------------------------------------------------------------------------------------------------------------------------- - 8138752 0x564bd3d0210 Task-1 0x0 - 8138752 0x564bd3d0410 Sundowning _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0610 TMBTE _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0810 TNDNBTG _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3d0a10 Levitate _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3e0550 DYWTYLM _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 - 8138752 0x564bd3e0710 Aqua Regia _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 + tid task id task name coroutine stack awaiter chain awaiter name awaiter id + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + 1935500 0x7fc930c18050 Task-1 TaskGroup._aexit -> TaskGroup.__aexit__ -> main 0x0 + 1935500 0x7fc930c18230 Sundowning TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fa50 TMBTE TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fdf0 TNDNBTG sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32510 Levitate sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32890 DYWTYLM sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 + 1935500 0x7fc93161ec30 Aqua Regia sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 - -or: +or a tree like this: .. code-block:: bash python -m asyncio pstree 12345 └── (T) Task-1 - └── main - └── __aexit__ - └── _aexit + └── main example.py:13 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) Sundowning - │ └── album - │ └── __aexit__ - │ └── _aexit + │ └── album example.py:8 + │ └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + │ └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 │ ├── (T) TNDNBTG + │ │ └── play example.py:4 + │ │ └── sleep Lib/asyncio/tasks.py:702 │ └── (T) Levitate + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) TMBTE - └── album - └── __aexit__ - └── _aexit + └── album example.py:8 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) DYWTYLM + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) Aqua Regia + └── play example.py:4 + └── sleep Lib/asyncio/tasks.py:702 If a cycle is detected in the async await graph (which could indicate a programming issue), the tool raises an error and lists the cycle paths that -prevent tree construction. +prevent tree construction: -(Contributed by Pablo Galindo, Łukasz Langa, Yury Selivanov, and Marta -Gomez Macias in :gh:`91048`.) +.. code-block:: bash -.. _whatsnew314-tail-call: + python -m asyncio pstree 12345 -A new type of interpreter -------------------------- + ERROR: await-graph contains cycles - cannot print a tree! -A new type of interpreter has been added to CPython. -It uses tail calls between small C functions that implement individual -Python opcodes, rather than one large C case statement. -For certain newer compilers, this interpreter provides -significantly better performance. Preliminary numbers on our machines suggest -anywhere up to 30% faster Python code, and a geometric mean of 3-5% -faster on ``pyperformance`` depending on platform and architecture. The -baseline is Python 3.14 built with Clang 19 without this new interpreter. + cycle: Task-2 → Task-3 → Task-2 -This interpreter currently only works with Clang 19 and newer -on x86-64 and AArch64 architectures. However, we expect -that a future release of GCC will support this as well. +(Contributed by Pablo Galindo, Łukasz Langa, Yury Selivanov, and Marta +Gomez Macias in :gh:`91048`.) -This feature is opt-in for now. We highly recommend enabling profile-guided -optimization with the new interpreter as it is the only configuration we have -tested and can validate its improved performance. -For further information on how to build Python, see -:option:`--with-tail-call-interp`. -.. note:: +.. _whatsnew314-concurrent-warnings-control: - This is not to be confused with `tail call optimization`__ of Python - functions, which is currently not implemented in CPython. +Concurrent safe warnings control +-------------------------------- - This new interpreter type is an internal implementation detail of the CPython - interpreter. It doesn't change the visible behavior of Python programs at - all. It can improve their performance, but doesn't change anything else. +The :class:`warnings.catch_warnings` context manager will now optionally +use a context variable for warning filters. This is enabled by setting +the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X`` +command-line option or an environment variable. This gives predictable +warnings control when using :class:`~warnings.catch_warnings` combined with +multiple threads or asynchronous tasks. The flag defaults to true for the +free-threaded build and false for the GIL-enabled build. - __ https://en.wikipedia.org/wiki/Tail_call +(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.) -.. attention:: - This section previously reported a 9-15% geometric mean speedup. This number has since been - cautiously revised down to 3-5%. While we expect performance results to be better - than what we report, our estimates are more conservative due to a - `compiler bug `_ found in - Clang/LLVM 19, which causes the normal interpreter to be slower. We were unaware of this bug, - resulting in inaccurate results. We sincerely apologize for - communicating results that were only accurate for LLVM v19.1.x and v20.1.0. In the meantime, - the bug has been fixed in LLVM v20.1.1 and for the upcoming v21.1, but it will remain - unfixed for LLVM v19.1.x and v20.1.0. Thus - any benchmarks with those versions of LLVM may produce inaccurate numbers. - (Thanks to Nelson Elhage for bringing this to light.) +Other language changes +====================== -(Contributed by Ken Jin in :gh:`128563`, with ideas on how to implement this -in CPython by Mark Shannon, Garrett Gu, Haoran Xu, and Josh Haberman.) - - -.. _whatsnew314-pyrepl-highlighting: - -Syntax highlighting in PyREPL ------------------------------ - -The default :term:`interactive` shell now highlights Python syntax as you -type. The feature is enabled by default unless the -:envvar:`PYTHON_BASIC_REPL` environment is set or any color-disabling -environment variables are used. See :ref:`using-on-controlling-color` for -details. - -The default color theme for syntax highlighting strives for good contrast -and uses exclusively the 4-bit VGA standard ANSI color codes for maximum -compatibility. The theme can be customized using an experimental API -``_colorize.set_theme()``. This can be called interactively, as well as -in the :envvar:`PYTHONSTARTUP` script. - -(Contributed by Łukasz Langa in :gh:`131507`.) - - -.. _whatsnew314-jit-compiler: - -Binary releases for the experimental just-in-time compiler ----------------------------------------------------------- - -The official macOS and Windows release binaries now include an *experimental* -just-in-time (JIT) compiler. Although it is **not** recommended for production -use, it can be tested by setting :envvar:`PYTHON_JIT=1 ` as an -environment variable. Downstream source builds and redistributors can use the -:option:`--enable-experimental-jit=yes-off` configuration option for similar -behavior. - -The JIT is at an early stage and still in active development. As such, the -typical performance impact of enabling it can range from 10% slower to 20% -faster, depending on workload. To aid in testing and evaluation, a set of -introspection functions has been provided in the :data:`sys._jit` namespace. -:func:`sys._jit.is_available` can be used to determine if the current executable -supports JIT compilation, while :func:`sys._jit.is_enabled` can be used to tell -if JIT compilation has been enabled for the current process. - -Currently, the most significant missing functionality is that native debuggers -and profilers like ``gdb`` and ``perf`` are unable to unwind through JIT frames -(Python debuggers and profilers, like :mod:`pdb` or :mod:`profile`, continue to -work without modification). Free-threaded builds do not support JIT compilation. - -Please report any bugs or major performance regressions that you encounter! - -.. seealso:: :pep:`744` +* All Windows code pages are now supported as 'cpXXX' codecs on Windows. + (Contributed by Serhiy Storchaka in :gh:`123803`.) +* Implement mixed-mode arithmetic rules combining real and complex numbers + as specified by the C standard since C99. + (Contributed by Sergey B Kirpichev in :gh:`69639`.) -Other language changes -====================== +* More syntax errors are now detected regardless of optimisation and + the :option:`-O` command-line option. + This includes writes to ``__debug__``, incorrect use of :keyword:`await`, + and asynchronous comprehensions outside asynchronous functions. + For example, ``python -O -c 'assert (__debug__ := 1)'`` + or ``python -O -c 'assert await 1'`` now produce :exc:`SyntaxError`\ s. + (Contributed by Irit Katriel and Jelle Zijlstra in :gh:`122245` & :gh:`121637`.) -* The default :term:`interactive` shell now supports import autocompletion. - This means that typing ``import foo`` and pressing ```` will suggest - modules starting with ``foo``. Similarly, typing ``from foo import b`` will - suggest submodules of ``foo`` starting with ``b``. Note that autocompletion - of module attributes is not currently supported. - (Contributed by Tomas Roun in :gh:`69605`.) +* When subclassing a pure C type, the C slots for the new type + are no longer replaced with a wrapped version on class creation + if they are not explicitly overridden in the subclass. + (Contributed by Tomasz Pytel in :gh:`132284`.) -* The :func:`map` built-in now has an optional keyword-only *strict* flag - like :func:`zip` to check that all the iterables are of equal length. - (Contributed by Wannes Boeykens in :gh:`119793`.) -* Incorrect usage of :keyword:`await` and asynchronous comprehensions - is now detected even if the code is optimized away by the :option:`-O` - command-line option. For example, ``python -O -c 'assert await 1'`` - now produces a :exc:`SyntaxError`. (Contributed by Jelle Zijlstra in :gh:`121637`.) +Built-ins +--------- -* Writes to ``__debug__`` are now detected even if the code is optimized - away by the :option:`-O` command-line option. For example, - ``python -O -c 'assert (__debug__ := 1)'`` now produces a - :exc:`SyntaxError`. (Contributed by Irit Katriel in :gh:`122245`.) +* The :meth:`bytes.fromhex` and :meth:`bytearray.fromhex` methods now accept + ASCII :class:`bytes` and :term:`bytes-like objects `. + (Contributed by Daniel Pope in :gh:`129349`.) * Add class methods :meth:`float.from_number` and :meth:`complex.from_number` to convert a number to :class:`float` or :class:`complex` type correspondingly. - They raise an error if the argument is a string. + They raise a :exc:`TypeError` if the argument is not a real number. (Contributed by Serhiy Storchaka in :gh:`84978`.) -* Implement mixed-mode arithmetic rules combining real and complex numbers as - specified by C standards since C99. - (Contributed by Sergey B Kirpichev in :gh:`69639`.) - -* All Windows code pages are now supported as "cpXXX" codecs on Windows. - (Contributed by Serhiy Storchaka in :gh:`123803`.) - -* :class:`super` objects are now :mod:`pickleable ` and - :mod:`copyable `. - (Contributed by Serhiy Storchaka in :gh:`125767`.) - -* The :class:`memoryview` type now supports subscription, - making it a :term:`generic type`. - (Contributed by Brian Schubert in :gh:`126012`.) - * Support underscore and comma as thousands separators in the fractional part for floating-point presentation types of the new-style string formatting (with :func:`format` or :ref:`f-strings`). (Contributed by Sergey B Kirpichev in :gh:`87790`.) -* The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept - ASCII :class:`bytes` and :term:`bytes-like objects `. - (Contributed by Daniel Pope in :gh:`129349`.) - -* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions `. - It is interpreted unambiguously in many other regular expression engines, - unlike ``\Z``, which has subtly different behavior. - (Contributed by Serhiy Storchaka in :gh:`133306`.) +* The :func:`int` function no longer delegates to :meth:`~object.__trunc__`. + Classes that want to support conversion to :func:`!int` must implement + either :meth:`~object.__int__` or :meth:`~object.__index__`. + (Contributed by Mark Dickinson in :gh:`119743`.) -* ``\B`` in :mod:`regular expression ` now matches empty input string. - Now it is always the opposite of ``\b``. - (Contributed by Serhiy Storchaka in :gh:`124130`.) +* The :func:`map` function now has an optional keyword-only *strict* flag + like :func:`zip` to check that all the iterables are of equal length. + (Contributed by Wannes Boeykens in :gh:`119793`.) -* iOS and macOS apps can now be configured to redirect ``stdout`` and - ``stderr`` content to the system log. (Contributed by Russell Keith-Magee in - :gh:`127592`.) +* The :class:`memoryview` type now supports subscription, + making it a :term:`generic type`. + (Contributed by Brian Schubert in :gh:`126012`.) -* The iOS testbed is now able to stream test output while the test is running. - The testbed can also be used to run the test suite of projects other than - CPython itself. (Contributed by Russell Keith-Magee in :gh:`127592`.) +* Using :data:`NotImplemented` in a boolean context + will now raise a :exc:`TypeError`. + This has raised a :exc:`DeprecationWarning` since Python 3.9. + (Contributed by Jelle Zijlstra in :gh:`118767`.) -* Three-argument :func:`pow` now tries calling :meth:`~object.__rpow__` if - necessary. Previously it was only called in two-argument :func:`!pow` and the - binary power operator. +* Three-argument :func:`pow` now tries calling :meth:`~object.__rpow__` + if necessary. + Previously it was only called in two-argument :func:`!pow` + and the binary power operator. (Contributed by Serhiy Storchaka in :gh:`130104`.) -* Add a built-in implementation for HMAC (:rfc:`2104`) using formally verified - code from the `HACL* `__ project. - This implementation is used as a fallback when the OpenSSL implementation - of HMAC is not available. - (Contributed by Bénédikt Tran in :gh:`99108`.) +* :class:`super` objects are now :mod:`copyable ` and :mod:`pickleable + `. + (Contributed by Serhiy Storchaka in :gh:`125767`.) + + +Command line and environment +---------------------------- * The import time flag can now track modules that are already loaded ('cached'), via the new :option:`-X importtime=2 <-X>`. When such a module is imported, the ``self`` and ``cumulative`` times are replaced by the string ``cached``. + Values above ``2`` for ``-X importtime`` are now reserved for future use. - (Contributed by Noah Kim and Adam Turner in :gh:`118655`.) -* When subclassing from a pure C type, the C slots for the new type are no - longer replaced with a wrapped version on class creation if they are not - explicitly overridden in the subclass. - (Contributed by Tomasz Pytel in :gh:`132329`.) + (Contributed by Noah Kim and Adam Turner in :gh:`118655`.) * The command-line option :option:`-c` now automatically dedents its code argument before execution. The auto-dedentation behavior mirrors :func:`textwrap.dedent`. (Contributed by Jon Crall and Steven Sun in :gh:`103998`.) -* Improve error message when an object supporting the synchronous - context manager protocol is entered using :keyword:`async - with` instead of :keyword:`with`. - And vice versa with the asynchronous context manager protocol. - (Contributed by Bénédikt Tran in :gh:`128398`.) - * :option:`!-J` is no longer a reserved flag for Jython_, and now has no special meaning. (Contributed by Adam Turner in :gh:`133336`.) .. _Jython: https://www.jython.org/ -.. _whatsnew314-pep765: -PEP 765: Disallow ``return``/``break``/``continue`` that exit a ``finally`` block ---------------------------------------------------------------------------------- +.. _whatsnew314-bracketless-except: + +PEP 758: Allow ``except`` and ``except*`` expressions without brackets +---------------------------------------------------------------------- + +The :keyword:`except` and :keyword:`except* ` expressions +now allow brackets to be omitted when there are multiple exception types +and the ``as`` clause is not used. +For example: + +.. code-block:: python + + try: + connect_to_server() + except TimeoutError, ConnectionRefusedError: + print('The network has ceased to be!') + +(Contributed by Pablo Galindo and Brett Cannon in :pep:`758` and :gh:`131831`.) + -The compiler emits a :exc:`SyntaxWarning` when a :keyword:`return`, :keyword:`break` or -:keyword:`continue` statements appears where it exits a :keyword:`finally` block. +.. _whatsnew314-finally-syntaxwarning: + +PEP 765: Control flow in :keyword:`finally` blocks +-------------------------------------------------- + +The compiler now emits a :exc:`SyntaxWarning` when a :keyword:`return`, +:keyword:`break`, or :keyword:`continue` statement have the effect of +leaving a :keyword:`finally` block. This change is specified in :pep:`765`. +In situations where this change is inconvenient (such as those where the +warnings are redundant due to code linting), the :ref:`warning filter +` can be used to turn off all syntax warnings by adding +``ignore::SyntaxWarning`` as a filter. This can be specified in combination +with a filter that converts other warnings to errors (for example, passing +``-Werror -Wignore::SyntaxWarning`` as CLI options, or setting +``PYTHONWARNINGS=error,ignore::SyntaxWarning``). + +Note that applying such a filter at runtime using the :mod:`warnings` module +will only suppress the warning in code that is compiled *after* the filter is +adjusted. Code that is compiled prior to the filter adjustment (for example, +when a module is imported) will still emit the syntax warning. + +(Contributed by Irit Katriel in :gh:`130080`.) + + +.. _whatsnew314-incremental-gc: + +Incremental garbage collection +------------------------------ + +The cycle garbage collector is now incremental. +This means that maximum pause times are reduced +by an order of magnitude or more for larger heaps. + +There are now only two generations: young and old. +When :func:`gc.collect` is not called directly, the +GC is invoked a little less frequently. When invoked, it +collects the young generation and an increment of the +old generation, instead of collecting one or more generations. + +The behavior of :func:`!gc.collect` changes slightly: + +* ``gc.collect(1)``: Performs an increment of garbage collection, + rather than collecting generation 1. +* Other calls to :func:`!gc.collect` are unchanged. + +(Contributed by Mark Shannon in :gh:`108362`.) + + +Default interactive shell +------------------------- + +.. _whatsnew314-pyrepl-highlighting: + +* The default :term:`interactive` shell now highlights Python syntax. + The feature is enabled by default, save if :envvar:`PYTHON_BASIC_REPL` + or any other environment variable that disables colour is set. + See :ref:`using-on-controlling-color` for details. + + The default color theme for syntax highlighting strives for good contrast + and exclusively uses the 4-bit VGA standard ANSI color codes for maximum + compatibility. The theme can be customized using an experimental API + :func:`!_colorize.set_theme`. + This can be called interactively or in the :envvar:`PYTHONSTARTUP` script. + Note that this function has no stability guarantees, + and may change or be removed. + + (Contributed by Łukasz Langa in :gh:`131507`.) + +* The default :term:`interactive` shell now supports import auto-completion. + This means that typing ``import co`` and pressing :kbd:`` will suggest + modules starting with ``co``. Similarly, typing ``from concurrent import i`` + will suggest submodules of ``concurrent`` starting with ``i``. + Note that autocompletion of module attributes is not currently supported. + (Contributed by Tomas Roun in :gh:`69605`.) + New modules =========== -* :mod:`annotationlib`: For introspecting :term:`annotations `. - See :pep:`749` for more details. +* :mod:`annotationlib`: + For introspecting :term:`annotations `. + See :ref:`PEP 749 ` for more details. (Contributed by Jelle Zijlstra in :gh:`119180`.) +* :mod:`compression` (including :mod:`compression.zstd`): + A package for compression-related modules, + including a new module to support the Zstandard compression format. + See :ref:`PEP 784 ` for more details. + (Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, + Victor Stinner, and Rogdham in :gh:`132983`.) + +* :mod:`concurrent.interpreters`: + Support for multiple interpreters in the standard library. + See :ref:`PEP 734 ` for more details. + (Contributed by Eric Snow in :gh:`134939`.) + +* :mod:`string.templatelib`: + Support for template string literals (t-strings). + See :ref:`PEP 750 ` for more details. + (Contributed by Jim Baker, Guido van Rossum, Paul Everitt, Koudai Aono, + Lysandros Nikolaou, Dave Peck, Adam Turner, Jelle Zijlstra, Bénédikt Tran, + and Pablo Galindo Salgado in :gh:`132661`.) + Improved modules ================ @@ -967,20 +1050,17 @@ argparse and subparser names if mistyped by the user. (Contributed by Savannah Ostrowski in :gh:`124456`.) - .. _whatsnew314-color-argparse: - -* Introduced the optional *color* parameter to - :class:`argparse.ArgumentParser`, enabling color for help text. - This can be controlled by :ref:`environment variables - `. Color has also been enabled for help in the - :ref:`stdlib CLIs ` which use :mod:`!argparse`. +* Enable color for help text, which can be disabled with the optional *color* + parameter to :class:`argparse.ArgumentParser`. + This can also be controlled by :ref:`environment variables + `. (Contributed by Hugo van Kemenade in :gh:`130645`.) ast --- -* Add :func:`ast.compare` for comparing two ASTs. +* Add :func:`~ast.compare`, a function for comparing two ASTs. (Contributed by Batuhan Taskaya and Jeremy Hylton in :gh:`60191`.) * Add support for :func:`copy.replace` for AST nodes. @@ -989,30 +1069,52 @@ ast * Docstrings are now removed from an optimized AST in optimization level 2. (Contributed by Irit Katriel in :gh:`123958`.) -* The ``repr()`` output for AST nodes now includes more information. +* The :func:`repr` output for AST nodes now includes more information. (Contributed by Tomas Roun in :gh:`116022`.) -* :func:`ast.parse`, when called with an AST as input, now always verifies - that the root node type is appropriate. +* When called with an AST as input, the :func:`~ast.parse` function + now always verifies that the root node type is appropriate. (Contributed by Irit Katriel in :gh:`130139`.) -* Add new ``--feature-version``, ``--optimize``, ``--show-empty`` options to - command-line interface. +* Add new options to the command-line interface: + :option:`--feature-version `, + :option:`--optimize `, and + :option:`--show-empty `. (Contributed by Semyon Moroz in :gh:`133367`.) -bdb ---- - -* The :mod:`bdb` module now supports the :mod:`sys.monitoring` backend. - (Contributed by Tian Gao in :gh:`124533`.) +asyncio +------- +* The function and methods named :func:`!create_task` now take an arbitrary + list of keyword arguments. All keyword arguments are passed to the + :class:`~asyncio.Task` constructor or the custom task factory. + (See :meth:`~asyncio.loop.set_task_factory` for details.) + The ``name`` and ``context`` keyword arguments are no longer special; + the name should now be set using the ``name`` keyword argument of the factory, + and ``context`` may be ``None``. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + + (Contributed by Thomas Grainger in :gh:`128307`.) + +* There are two new utility functions for + introspecting and printing a program's call graph: + :func:`~asyncio.capture_call_graph` and :func:`~asyncio.print_call_graph`. + See :ref:`Asyncio introspection capabilities + ` for more details. + (Contributed by Yury Selivanov, Pablo Galindo Salgado, and Łukasz Langa + in :gh:`91048`.) - .. _whatsnew314-color-calendar: calendar -------- +.. _whatsnew314-color-calendar: + * By default, today's date is highlighted in color in :mod:`calendar`'s :ref:`command-line ` text output. This can be controlled by :ref:`environment variables @@ -1023,19 +1125,27 @@ calendar concurrent.futures ------------------ -* Add :class:`~concurrent.futures.InterpreterPoolExecutor`, - which exposes "subinterpreters" (multiple Python interpreters in the - same process) to Python code. This is separate from the proposed API - in :pep:`734`. +.. _whatsnew314-concurrent-futures-interp-pool: + +* Add a new executor class, :class:`~concurrent.futures.InterpreterPoolExecutor`, + which exposes multiple Python interpreters in the same process + ('subinterpreters') to Python code. + This uses a pool of independent Python interpreters to execute calls + asynchronously. + + This is separate from the new :mod:`~concurrent.interpreters` module + introduced by :ref:`PEP 734 `. (Contributed by Eric Snow in :gh:`124548`.) .. _whatsnew314-concurrent-futures-start-method: -* The default :class:`~concurrent.futures.ProcessPoolExecutor` - :ref:`start method ` changed - from :ref:`fork ` to :ref:`forkserver - ` on platforms other than macOS and - Windows where it was already :ref:`spawn `. +* On Unix platforms other than macOS, :ref:`'forkserver' + ` is now the default :ref:`start + method ` for + :class:`~concurrent.futures.ProcessPoolExecutor` + (replacing :ref:`'fork' `). + This change does not affect Windows or macOS, where :ref:`'spawn' + ` remains the default start method. If the threading incompatible *fork* method is required, you must explicitly request it by supplying a multiprocessing context *mp_context* to @@ -1048,23 +1158,36 @@ concurrent.futures (Contributed by Gregory P. Smith in :gh:`84559`.) -* Add :meth:`concurrent.futures.ProcessPoolExecutor.terminate_workers` and - :meth:`concurrent.futures.ProcessPoolExecutor.kill_workers` as - ways to terminate or kill all living worker processes in the given pool. +* Add two new methods to :class:`~concurrent.futures.ProcessPoolExecutor`, + :meth:`~concurrent.futures.ProcessPoolExecutor.terminate_workers` + and :meth:`~concurrent.futures.ProcessPoolExecutor.kill_workers`, + as ways to terminate or kill all living worker processes in the given pool. (Contributed by Charles Machalow in :gh:`130849`.) -* Add the optional ``buffersize`` parameter to - :meth:`concurrent.futures.Executor.map` to limit the number of submitted +* Add the optional *buffersize* parameter to :meth:`Executor.map + ` to limit the number of submitted tasks whose results have not yet been yielded. If the buffer is full, iteration over the *iterables* pauses until a result is yielded from the buffer. (Contributed by Enzo Bonnal and Josh Rosenberg in :gh:`74028`.) +configparser +------------ + +* :mod:`!configparser` will no longer write config files it cannot read, + to improve security. + Attempting to :meth:`~configparser.ConfigParser.write` keys containing + delimiters or beginning with the section header pattern will raise an + :class:`~configparser.InvalidWriteError`. + (Contributed by Jacob Lincoln in :gh:`129270`.) + + contextvars ----------- -* Support context manager protocol by :class:`contextvars.Token`. +* Support the :term:`context manager` protocol + for :class:`~contextvars.Token` objects. (Contributed by Andrew Svetlov in :gh:`129889`.) @@ -1072,8 +1195,8 @@ ctypes ------ * The layout of :ref:`bit fields ` - in :class:`~ctypes.Structure` and :class:`~ctypes.Union` - now matches platform defaults (GCC/Clang or MSVC) more closely. + in :class:`~ctypes.Structure` and :class:`~ctypes.Union` objects + is now a closer match to platform defaults (GCC/Clang or MSVC). In particular, fields no longer overlap. (Contributed by Matthias Görgens in :gh:`97702`.) @@ -1092,7 +1215,7 @@ ctypes * On Windows, the :func:`~ctypes.CopyComPointer` function is now public. (Contributed by Jun Komoda in :gh:`127275`.) -* :func:`ctypes.memoryview_at` now exists to create a +* Add :func:`~ctypes.memoryview_at`, a function to create a :class:`memoryview` object that refers to the supplied pointer and length. This works like :func:`ctypes.string_at` except it avoids a buffer copy, and is typically useful when implementing pure Python @@ -1100,7 +1223,7 @@ ctypes (Contributed by Rian Hunter in :gh:`112018`.) * Complex types, :class:`~ctypes.c_float_complex`, - :class:`~ctypes.c_double_complex` and :class:`~ctypes.c_longdouble_complex`, + :class:`~ctypes.c_double_complex`, and :class:`~ctypes.c_longdouble_complex`, are now available if both the compiler and the ``libffi`` library support complex C types. (Contributed by Sergey B Kirpichev in :gh:`61103`.) @@ -1110,47 +1233,57 @@ ctypes (Contributed by Brian Ward in :gh:`119349`.) * Move :func:`ctypes.POINTER` types cache from a global internal cache - (``_pointer_type_cache``) to the :attr:`ctypes._CData.__pointer_type__` - attribute of the corresponding :mod:`ctypes` types. + (``_pointer_type_cache``) to the :attr:`_CData.__pointer_type__ + ` attribute of the corresponding + :mod:`!ctypes` types. This will stop the cache from growing without limits in some situations. (Contributed by Sergey Miryanov in :gh:`100926`.) -* The :class:`ctypes.py_object` type now supports subscription, +* The :class:`~ctypes.py_object` type now supports subscription, making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`132168`.) +* :mod:`!ctypes` now supports :term:`free-threading builds `. + (Contributed by Kumar Aditya and Peter Bierma in :gh:`127945`.) + + curses ------ * Add the :func:`~curses.assume_default_colors` function, a refinement of the :func:`~curses.use_default_colors` function which - allows to change the color pair ``0``. + allows changing the color pair ``0``. (Contributed by Serhiy Storchaka in :gh:`133139`.) + datetime -------- -* Add :meth:`datetime.time.strptime` and :meth:`datetime.date.strptime`. +* Add the :meth:`~datetime.date.strptime` method to the + :class:`datetime.date` and :class:`datetime.time` classes. (Contributed by Wannes Boeykens in :gh:`41431`.) + decimal ------- -* Add alternative :class:`~decimal.Decimal` constructor - :meth:`Decimal.from_number() `. +* Add :meth:`.Decimal.from_number` as an alternative constructor for + :class:`~decimal.Decimal`. (Contributed by Serhiy Storchaka in :gh:`121798`.) -* Expose :func:`decimal.IEEEContext` to support creation of contexts +* Expose :func:`~decimal.IEEEContext` to support creation of contexts corresponding to the IEEE 754 (2008) decimal interchange formats. (Contributed by Sergey B Kirpichev in :gh:`53032`.) + difflib ------- * Comparison pages with highlighted changes generated by the - :class:`difflib.HtmlDiff` class now support dark mode. + :class:`~difflib.HtmlDiff` class now support 'dark mode'. (Contributed by Jiahao Li in :gh:`129939`.) + dis --- @@ -1175,7 +1308,7 @@ dis errno ----- -* Add :data:`errno.EHWPOISON` error code. +* Add the :data:`~errno.EHWPOISON` error code constant. (Contributed by James Roy in :gh:`126585`.) @@ -1183,39 +1316,43 @@ faulthandler ------------ * Add support for printing the C stack trace on systems that - :ref:`support it ` via :func:`faulthandler.dump_c_stack` - or via the *c_stack* argument in :func:`faulthandler.enable`. + :ref:`support it ` via the new + :func:`~faulthandler.dump_c_stack` function or via the *c_stack* argument + in :func:`faulthandler.enable`. (Contributed by Peter Bierma in :gh:`127604`.) fnmatch ------- -* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern. +* Add :func:`~fnmatch.filterfalse`, a function to reject names + matching a given pattern. (Contributed by Bénédikt Tran in :gh:`74598`.) fractions --------- -* Add support for converting any objects that have the - :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. +* A :class:`~fractions.Fraction` object may now be constructed from any + object with the :meth:`!as_integer_ratio` method. (Contributed by Serhiy Storchaka in :gh:`82017`.) -* Add alternative :class:`~fractions.Fraction` constructor - :meth:`Fraction.from_number() `. +* Add :meth:`.Fraction.from_number` as an alternative constructor for + :class:`~fractions.Fraction`. (Contributed by Serhiy Storchaka in :gh:`121797`.) functools --------- -* Add support to :func:`functools.partial` and - :func:`functools.partialmethod` for :data:`functools.Placeholder` sentinels - to reserve a place for positional arguments. +* Add the :data:`~functools.Placeholder` sentinel. + This may be used with the :func:`~functools.partial` + or :func:`~functools.partialmethod` functions to reserve a place + for positional arguments in the returned :ref:`partial object + `. (Contributed by Dominykas Grigonis in :gh:`119127`.) -* Allow the *initial* parameter of :func:`functools.reduce` to be passed +* Allow the *initial* parameter of :func:`~functools.reduce` to be passed as a keyword argument. (Contributed by Sayandip Dutta in :gh:`125916`.) @@ -1233,16 +1370,17 @@ getopt getpass ------- -* Support keyboard feedback by :func:`getpass.getpass` via the keyword-only - optional argument ``echo_char``. Placeholder characters are rendered whenever - a character is entered, and removed when a character is deleted. +* Support keyboard feedback in the :func:`~getpass.getpass` function via + the keyword-only optional argument *echo_char*. + Placeholder characters are rendered whenever a character is entered, + and removed when a character is deleted. (Contributed by Semyon Moroz in :gh:`77065`.) graphlib -------- -* Allow :meth:`graphlib.TopologicalSorter.prepare` to be called more than once +* Allow :meth:`.TopologicalSorter.prepare` to be called more than once as long as sorting has not started. (Contributed by Daniel Pope in :gh:`130914`.) @@ -1250,13 +1388,14 @@ graphlib heapq ----- -* Add functions for working with max-heaps: +* The :mod:`!heapq` module has improved support for working with max-heaps, + via the following new functions: - * :func:`heapq.heapify_max`, - * :func:`heapq.heappush_max`, - * :func:`heapq.heappop_max`, - * :func:`heapq.heapreplace_max` - * :func:`heapq.heappushpop_max` + * :func:`~heapq.heapify_max` + * :func:`~heapq.heappush_max` + * :func:`~heapq.heappop_max` + * :func:`~heapq.heapreplace_max` + * :func:`~heapq.heappushpop_max` hmac @@ -1264,6 +1403,8 @@ hmac * Add a built-in implementation for HMAC (:rfc:`2104`) using formally verified code from the `HACL* `__ project. + This implementation is used as a fallback when the OpenSSL implementation + of HMAC is not available. (Contributed by Bénédikt Tran in :gh:`99108`.) @@ -1279,9 +1420,12 @@ http the command-line interface (``python -m http.server``) through the following options: - * ``--tls-cert ``: Path to the TLS certificate file. - * ``--tls-key ``: Optional path to the private key file. - * ``--tls-password-file ``: Optional path to the password file for the private key. + * :option:`--tls-cert \ `: + Path to the TLS certificate file. + * :option:`--tls-key \ `: + Optional path to the private key file. + * :option:`--tls-password-file \ `: + Optional path to the password file for the private key. (Contributed by Semyon Moroz in :gh:`85162`.) @@ -1289,7 +1433,7 @@ http imaplib ------- -* Add :meth:`IMAP4.idle() `, implementing the IMAP4 +* Add :meth:`.IMAP4.idle`, implementing the IMAP4 ``IDLE`` command as defined in :rfc:`2177`. (Contributed by Forest in :gh:`55454`.) @@ -1297,15 +1441,16 @@ imaplib inspect ------- -* :func:`inspect.signature` takes a new argument *annotation_format* to control +* :func:`~inspect.signature` takes a new argument *annotation_format* to control the :class:`annotationlib.Format` used for representing annotations. (Contributed by Jelle Zijlstra in :gh:`101552`.) -* :meth:`inspect.Signature.format` takes a new argument *unquote_annotations*. - If true, string :term:`annotations ` are displayed without surrounding quotes. +* :meth:`.Signature.format` takes a new argument *unquote_annotations*. + If true, string :term:`annotations ` are displayed without + surrounding quotes. (Contributed by Jelle Zijlstra in :gh:`101552`.) -* Add function :func:`inspect.ispackage` to determine whether an object is a +* Add function :func:`~inspect.ispackage` to determine whether an object is a :term:`package` or not. (Contributed by Zhikang Yan in :gh:`125634`.) @@ -1317,7 +1462,7 @@ io :exc:`BlockingIOError` if the operation cannot immediately return bytes. (Contributed by Giovanni Siragusa in :gh:`109523`.) -* Add protocols :class:`io.Reader` and :class:`io.Writer` as a simpler +* Add the :class:`~io.Reader` and :class:`~io.Writer` protocols as simpler alternatives to the pseudo-protocols :class:`typing.IO`, :class:`typing.TextIO`, and :class:`typing.BinaryIO`. (Contributed by Sebastian Rittau in :gh:`127648`.) @@ -1326,35 +1471,36 @@ io json ---- -* Add notes for JSON serialization errors that allow to identify the source - of the error. +* Add exception notes for JSON serialization errors that allow + identifying the source of the error. (Contributed by Serhiy Storchaka in :gh:`122163`.) -* Enable the :mod:`json` module to work as a script using the :option:`-m` - switch: :program:`python -m json`. +* Allow using the :mod:`json` module as a script using the :option:`-m` switch: + :program:`python -m json`. + This is now preferred to :program:`python -m json.tool`, + which is :term:`soft deprecated`. See the :ref:`JSON command-line interface ` documentation. (Contributed by Trey Hunner in :gh:`122873`.) - .. _whatsnew314-color-json: - * By default, the output of the :ref:`JSON command-line interface ` is highlighted in color. This can be controlled by :ref:`environment variables `. (Contributed by Tomas Roun in :gh:`131952`.) + linecache --------- -* :func:`linecache.getline` can retrieve source code for frozen modules. +* :func:`~linecache.getline` can now retrieve source code for frozen modules. (Contributed by Tian Gao in :gh:`131638`.) logging.handlers ---------------- -* :class:`logging.handlers.QueueListener` now implements the context - manager protocol, allowing it to be used in a :keyword:`with` statement. +* :class:`~logging.handlers.QueueListener` objects now support the + :term:`context manager` protocol. (Contributed by Charles Machalow in :gh:`132106`.) * :meth:`QueueListener.start ` now @@ -1366,20 +1512,19 @@ math ---- * Added more detailed error messages for domain errors in the module. - (Contributed by by Charlie Zhao and Sergey B Kirpichev in :gh:`101410`.) + (Contributed by Charlie Zhao and Sergey B Kirpichev in :gh:`101410`.) mimetypes --------- -* Document the command-line for :mod:`mimetypes`. - It now exits with ``1`` on failure instead of ``0`` - and ``2`` on incorrect command-line parameters instead of ``1``. - Also, errors are printed to stderr instead of stdout and their text is made - tighter. +* Add a public :ref:`command-line ` for the module, + invoked via :program:`python -m mimetypes`. (Contributed by Oleg Iarygin and Hugo van Kemenade in :gh:`93096`.) -* Add MS and :rfc:`8081` MIME types for fonts: +* Add several new MIME types based on RFCs and common usage: + + .. rubric:: Microsoft and :rfc:`8081` MIME types for fonts * Embedded OpenType: ``application/vnd.ms-fontobject`` * OpenType Layout (OTF) ``font/otf`` @@ -1387,18 +1532,14 @@ mimetypes * WOFF 1.0 ``font/woff`` * WOFF 2.0 ``font/woff2`` - (Contributed by Sahil Prajapati and Hugo van Kemenade in :gh:`84852`.) - -* Add :rfc:`9559` MIME types for Matroska audiovisual data container - structures, containing: + .. rubric:: :rfc:`9559` MIME types for Matroska audiovisual + data container structures * audio with no video: ``audio/matroska`` (``.mka``) * video: ``video/matroska`` (``.mkv``) * stereoscopic video: ``video/matroska-3d`` (``.mk3d``) - (Contributed by Hugo van Kemenade in :gh:`89416`.) - -* Add MIME types for images with RFCs: + .. rubric:: Images with RFCs * :rfc:`1494`: CCITT Group 3 (``.g3``) * :rfc:`3362`: Real-time Facsimile, T.38 (``.t38``) @@ -1407,9 +1548,7 @@ mimetypes * :rfc:`4047`: Flexible Image Transport System (``.fits``) * :rfc:`7903`: Enhanced Metafile (``.emf``) and Windows Metafile (``.wmf``) - (Contributed by Hugo van Kemenade in :gh:`85957`.) - -* More MIME type changes: + .. rubric:: Other MIME type additions and changes * :rfc:`2361`: Change type for ``.avi`` to ``video/vnd.avi`` and for ``.wav`` to ``audio/vnd.wave`` @@ -1417,6 +1556,8 @@ mimetypes * :rfc:`5334`: Add Ogg media (``.oga``, ``.ogg`` and ``.ogx``) * :rfc:`6713`: Add gzip ``application/gzip`` (``.gz``) * :rfc:`9639`: Add FLAC ``audio/flac`` (``.flac``) + * :rfc:`9512` ``application/yaml`` MIME type for YAML files (``.yaml`` + and ``.yml``) * Add 7z ``application/x-7z-compressed`` (``.7z``) * Add Android Package ``application/vnd.android.package-archive`` (``.apk``) when not strict @@ -1439,11 +1580,9 @@ mimetypes * `W3C `__: Add EPUB ``application/epub+zip`` (``.epub``) - (Contributed by Hugo van Kemenade in :gh:`129965`.) - -* Add :rfc:`9512` ``application/yaml`` MIME type for YAML files (``.yaml`` - and ``.yml``). (Contributed by Sasha "Nelie" Chernykh and Hugo van Kemenade - in :gh:`132056`.) + (Contributed by Sahil Prajapati and Hugo van Kemenade in :gh:`84852`, + by Sasha "Nelie" Chernykh and Hugo van Kemenade in :gh:`132056`, + and by Hugo van Kemenade in :gh:`89416`, :gh:`85957`, and :gh:`129965`.) multiprocessing @@ -1451,14 +1590,16 @@ multiprocessing .. _whatsnew314-multiprocessing-start-method: -* The default :ref:`start method ` changed - from :ref:`fork ` to :ref:`forkserver - ` on platforms other than macOS and - Windows where it was already :ref:`spawn `. +* On Unix platforms other than macOS, :ref:`'forkserver' + ` is now the default :ref:`start + method ` + (replacing :ref:`'fork' `). + This change does not affect Windows or macOS, where :ref:`'spawn' + ` remains the default start method. If the threading incompatible *fork* method is required, you must explicitly - request it via a context from :func:`multiprocessing.get_context` (preferred) - or change the default via :func:`multiprocessing.set_start_method`. + request it via a context from :func:`~multiprocessing.get_context` (preferred) + or change the default via :func:`~multiprocessing.set_start_method`. See :ref:`forkserver restrictions ` for information and differences with the *fork* method and how this change @@ -1467,7 +1608,7 @@ multiprocessing (Contributed by Gregory P. Smith in :gh:`84559`.) -* :mod:`multiprocessing`'s ``"forkserver"`` start method now authenticates +* :mod:`multiprocessing`'s ``'forkserver'`` start method now authenticates its control socket to avoid solely relying on filesystem permissions to restrict what other processes could cause the forkserver to spawn workers and run code. @@ -1483,20 +1624,22 @@ multiprocessing (Contributed by Roy Hyunjin Han for :gh:`103134`.) * Add support for shared :class:`set` objects via - :meth:`SyncManager.set() `. - The :func:`set` in :func:`multiprocessing.Manager` method is now available. + :meth:`.SyncManager.set`. + The :func:`set` in :func:`~multiprocessing.Manager` method is now available. (Contributed by Mingyu Park in :gh:`129949`.) -* Add :func:`multiprocessing.Process.interrupt` which terminates the child +* Add the :meth:`~multiprocessing.Process.interrupt` + to :class:`multiprocessing.Process` objects, which terminates the child process by sending :py:const:`~signal.SIGINT`. This enables :keyword:`finally` clauses to print a stack trace for the terminated process. (Contributed by Artem Pulkin in :gh:`131913`.) + operator -------- -* Two new functions :func:`operator.is_none` and :func:`operator.is_not_none` - have been added, such that ``operator.is_none(obj)`` is equivalent +* Add :func:`~operator.is_none` and :func:`~operator.is_not_none` as a pair + of functions, such that ``operator.is_none(obj)`` is equivalent to ``obj is None`` and ``operator.is_not_none(obj)`` is equivalent to ``obj is not None``. (Contributed by Raymond Hettinger and Nico Mexis in :gh:`115808`.) @@ -1505,21 +1648,31 @@ operator os -- -* Add the :func:`os.reload_environ` function to update :data:`os.environ` and +* Add the :func:`~os.reload_environ` function to update :data:`os.environ` and :data:`os.environb` with changes to the environment made by :func:`os.putenv`, by :func:`os.unsetenv`, or made outside Python in the same process. (Contributed by Victor Stinner in :gh:`120057`.) * Add the :data:`~os.SCHED_DEADLINE` and :data:`~os.SCHED_NORMAL` constants - to the :mod:`os` module. + to the :mod:`!os` module. (Contributed by James Roy in :gh:`127688`.) -* Add the :func:`os.readinto` function to read into a +* Add the :func:`~os.readinto` function to read into a :ref:`buffer object ` from a file descriptor. (Contributed by Cody Maloney in :gh:`129205`.) +os.path +------- + +* The *strict* parameter to :func:`~os.path.realpath` accepts a new value, + :data:`~os.path.ALLOW_MISSING`. + If used, errors other than :exc:`FileNotFoundError` will be re-raised; + the resulting path can be missing but it will be free of symlinks. + (Contributed by Petr Viktorin for :cve:`2025-4517`.) + + pathlib ------- @@ -1533,8 +1686,8 @@ pathlib (Contributed by Barney Gale in :gh:`73991`.) -* Add :attr:`pathlib.Path.info` attribute, which stores an object - implementing the :class:`pathlib.types.PathInfo` protocol (also new). The +* Add the :attr:`~pathlib.Path.info` attribute, which stores an object + implementing the new :class:`pathlib.types.PathInfo` protocol. The object supports querying the file type and internally caching :func:`~os.stat` results. Path objects generated by :meth:`~pathlib.Path.iterdir` are initialized with file type information @@ -1545,7 +1698,26 @@ pathlib pdb --- -* Hardcoded breakpoints (:func:`breakpoint` and :func:`pdb.set_trace`) now +* The :mod:`pdb` module now supports remote attaching to a running Python process + using a new :option:`-p PID ` command-line option: + + .. code-block:: sh + + python -m pdb -p 1234 + + This will connect to the Python process with the given PID and allow you to + debug it interactively. Notice that due to how the Python interpreter works + attaching to a remote process that is blocked in a system call or waiting for + I/O will only work once the next bytecode instruction is executed or when the + process receives a signal. + + This feature uses :ref:`PEP 768 ` + and the new :func:`sys.remote_exec` function to attach to the remote process + and send the PDB commands to it. + + (Contributed by Matt Wozniski and Pablo Galindo in :gh:`131591`.) + +* Hardcoded breakpoints (:func:`breakpoint` and :func:`~pdb.set_trace`) now reuse the most recent :class:`~pdb.Pdb` instance that calls :meth:`~pdb.Pdb.set_trace`, instead of creating a new one each time. As a result, all the instance specific data like :pdbcmd:`display` and @@ -1578,21 +1750,14 @@ pdb * ``$_asynctask`` is added to access the current asyncio task if applicable. (Contributed by Tian Gao in :gh:`124367`.) -* :mod:`pdb` now supports two backends: :func:`sys.settrace` and - :mod:`sys.monitoring`. Using :mod:`pdb` CLI or :func:`breakpoint` will - always use the :mod:`sys.monitoring` backend. Explicitly instantiating - :class:`pdb.Pdb` and its derived classes will use the :func:`sys.settrace` - backend by default, which is configurable. - (Contributed by Tian Gao in :gh:`124533`.) - * :func:`pdb.set_trace_async` is added to support debugging asyncio coroutines. :keyword:`await` statements are supported with this function. (Contributed by Tian Gao in :gh:`132576`.) * Source code displayed in :mod:`pdb` will be syntax-highlighted. This feature - can be controlled using the same methods as PyREPL, in addition to the newly - added ``colorize`` argument of :class:`pdb.Pdb`. + can be controlled using the same methods as the default :term:`interactive` + shell, in addition to the newly added ``colorize`` argument of :class:`pdb.Pdb`. (Contributed by Tian Gao and Łukasz Langa in :gh:`133355`.) @@ -1602,15 +1767,16 @@ pickle * Set the default protocol version on the :mod:`pickle` module to 5. For more details, see :ref:`pickle protocols `. -* Add notes for pickle serialization errors that allow to identify the source - of the error. +* Add exception notes for pickle serialization errors that allow + identifying the source of the error. (Contributed by Serhiy Storchaka in :gh:`122213`.) platform -------- -* Add :func:`platform.invalidate_caches` to invalidate the cached results. +* Add :func:`~platform.invalidate_caches`, a function to invalidate + cached results in the :mod:`!platform` module. (Contributed by Bénédikt Tran in :gh:`122549`.) @@ -1622,6 +1788,19 @@ pydoc (Contributed by Jelle Zijlstra in :gh:`101552`.) +re +-- + +* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions `. + It is interpreted unambiguously in many other regular expression engines, + unlike ``\Z``, which has subtly different behavior. + (Contributed by Serhiy Storchaka in :gh:`133306`.) + +* ``\B`` in :mod:`regular expression ` now matches the empty input string, + meaning that it is now always the opposite of ``\b``. + (Contributed by Serhiy Storchaka in :gh:`124130`.) + + socket ------ @@ -1648,11 +1827,12 @@ socket * Add many new constants. (Contributed by Serhiy Storchaka in :gh:`132734`.) + ssl --- -* Indicate through :data:`ssl.HAS_PHA` whether the :mod:`ssl` module supports - TLSv1.3 post-handshake client authentication (PHA). +* Indicate through the :data:`~ssl.HAS_PHA` Boolean whether the :mod:`!ssl` + module supports TLSv1.3 post-handshake client authentication (PHA). (Contributed by Will Childs-Klein in :gh:`128036`.) @@ -1668,7 +1848,7 @@ struct symtable -------- -* Expose the following :class:`symtable.Symbol` methods: +* Expose the following :class:`~symtable.Symbol` methods: * :meth:`~symtable.Symbol.is_comp_cell` * :meth:`~symtable.Symbol.is_comp_iter` @@ -1683,31 +1863,69 @@ sys * The previously undocumented special function :func:`sys.getobjects`, which only exists in specialized builds of Python, may now return objects from other interpreters than the one it's called in. + (Contributed by Eric Snow in :gh:`125286`.) * Add :func:`sys._is_immortal` for determining if an object is :term:`immortal`. (Contributed by Peter Bierma in :gh:`128509`.) -* On FreeBSD, :data:`sys.platform` doesn't contain the major version anymore. +* On FreeBSD, :data:`sys.platform` no longer contains the major version number. It is always ``'freebsd'``, instead of ``'freebsd13'`` or ``'freebsd14'``. + (Contributed by Michael Osipov in :gh:`129393`.) * Raise :exc:`DeprecationWarning` for :func:`sys._clear_type_cache`. This function was deprecated in Python 3.13 but it didn't raise a runtime warning. +* Add :func:`sys.remote_exec` to implement the new external debugger interface. + See :ref:`PEP 768 ` for details. + (Contributed by Pablo Galindo Salgado, Matt Wozniski, and Ivona Stojanovic + in :gh:`131591`.) + +* Add the :data:`sys._jit` namespace, containing utilities for introspecting + just-in-time compilation. + (Contributed by Brandt Bucher in :gh:`133231`.) + sys.monitoring -------------- -* Two new events are added: :monitoring-event:`BRANCH_LEFT` and - :monitoring-event:`BRANCH_RIGHT`. The ``BRANCH`` event is deprecated. +* Add two new monitoring events, :monitoring-event:`BRANCH_LEFT` and + :monitoring-event:`BRANCH_RIGHT`. + These replace and deprecate the :monitoring-event:`!BRANCH` event. + (Contributed by Mark Shannon in :gh:`122548`.) sysconfig --------- -* Add ``ABIFLAGS`` key to :func:`sysconfig.get_config_vars` on Windows. +* Add ``ABIFLAGS`` key to :func:`~sysconfig.get_config_vars` on Windows. (Contributed by Xuehai Pan in :gh:`131799`.) +tarfile +------- + +* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to + avoid path traversal attacks. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.) + +* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes + when a directory was removed or replaced by another kind of file. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.) + +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now (re-)apply the extraction filter when substituting a link (hard or + symbolic) with a copy of another archive member, and when fixing up + directory attributes. + The former raises a new exception, :exc:`~tarfile.LinkFallbackError`. + (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.) + +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + no longer extract rejected members when + :func:`~tarfile.TarFile.errorlevel` is zero. + (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` + and :cve:`2025-4435`.) + + threading --------- @@ -1720,17 +1938,18 @@ tkinter ------- * Make :mod:`tkinter` widget methods :meth:`!after` and :meth:`!after_idle` - accept arguments passed by keyword. + accept keyword arguments. (Contributed by Zhikang Yan in :gh:`126899`.) -* Add ability to specify name for :class:`!tkinter.OptionMenu` and +* Add ability to specify a name for :class:`!tkinter.OptionMenu` and :class:`!tkinter.ttk.OptionMenu`. (Contributed by Zhikang Yan in :gh:`130482`.) + turtle ------ -* Add context managers for :func:`turtle.fill`, :func:`turtle.poly` +* Add context managers for :func:`turtle.fill`, :func:`turtle.poly`, and :func:`turtle.no_animation`. (Contributed by Marie Roald and Yngve Mardal Moe in :gh:`126350`.) @@ -1748,43 +1967,60 @@ typing .. _whatsnew314-typing-union: -* :class:`types.UnionType` and :class:`typing.Union` are now aliases for each other, - meaning that both old-style unions (created with ``Union[int, str]``) and new-style - unions (``int | str``) now create instances of the same runtime type. This unifies - the behavior between the two syntaxes, but leads to some differences in behavior that +* The :class:`types.UnionType` and :class:`typing.Union` types are now + aliases for each other, meaning that both old-style unions + (created with ``Union[int, str]``) and new-style unions (``int | str``) + now create instances of the same runtime type. This unifies the behavior + between the two syntaxes, but leads to some differences in behavior that may affect users who introspect types at runtime: - - Both syntaxes for creating a union now produce the same string representation in - ``repr()``. For example, ``repr(Union[int, str])`` - is now ``"int | str"`` instead of ``"typing.Union[int, str]"``. - - Unions created using the old syntax are no longer cached. Previously, running - ``Union[int, str]`` multiple times would return the same object - (``Union[int, str] is Union[int, str]`` would be ``True``), but now it will - return two different objects. Users should use ``==`` to compare unions for equality, not - ``is``. New-style unions have never been cached this way. - This change could increase memory usage for some programs that use a large number of - unions created by subscripting ``typing.Union``. However, several factors offset this cost: - unions used in annotations are no longer evaluated by default in Python 3.14 - because of :pep:`649`; an instance of :class:`types.UnionType` is - itself much smaller than the object returned by ``Union[]`` was on prior Python - versions; and removing the cache also saves some space. It is therefore - unlikely that this change will cause a significant increase in memory usage for most - users. + - Both syntaxes for creating a union now produce the same string + representation in :func:`repr`. + For example, ``repr(Union[int, str])`` is now ``"int | str"`` instead of + ``"typing.Union[int, str]"``. + + - Unions created using the old syntax are no longer cached. + Previously, running ``Union[int, str]`` multiple times would return + the same object (``Union[int, str] is Union[int, str]`` would be ``True``), + but now it will return two different objects. + Use ``==`` to compare unions for equality, not ``is``. + New-style unions have never been cached this way. + This change could increase memory usage for some programs that use + a large number of unions created by subscripting ``typing.Union``. + However, several factors offset this cost: + unions used in annotations are no longer evaluated by default in Python + 3.14 because of :pep:`649`; an instance of :class:`types.UnionType` is + itself much smaller than the object returned by ``Union[]`` was on prior + Python versions; and removing the cache also saves some space. + It is therefore unlikely that this change will cause a significant increase + in memory usage for most users. + - Previously, old-style unions were implemented using the private class - ``typing._UnionGenericAlias``. This class is no longer needed for the implementation, - but it has been retained for backward compatibility, with removal scheduled for Python - 3.17. Users should use documented introspection helpers like :func:`typing.get_origin` - and :func:`typing.get_args` instead of relying on private implementation details. - - It is now possible to use :class:`typing.Union` itself in :func:`isinstance` checks. - For example, ``isinstance(int | str, typing.Union)`` will return ``True``; previously - this raised :exc:`TypeError`. - - The ``__args__`` attribute of :class:`typing.Union` objects is no longer writable. - - It is no longer possible to set any attributes on :class:`typing.Union` objects. + ``typing._UnionGenericAlias``. + This class is no longer needed for the implementation, + but it has been retained for backward compatibility, + with removal scheduled for Python 3.17. + Users should use documented introspection helpers like + :func:`~typing.get_origin` and :func:`typing.get_args` instead of + relying on private implementation details. + + - It is now possible to use :class:`typing.Union` itself in + :func:`isinstance` checks. + For example, ``isinstance(int | str, typing.Union)`` will return ``True``; + previously this raised :exc:`TypeError`. + + - The :attr:`!__args__` attribute of :class:`typing.Union` objects is + no longer writable. + + - It is no longer possible to set any attributes on :class:`~typing.Union` + objects. This only ever worked for dunder attributes on previous versions, was never documented to work, and was subtly broken in many cases. (Contributed by Jelle Zijlstra in :gh:`105499`.) +* :class:`~typing.TypeAliasType` now supports star unpacking. + unicodedata ----------- @@ -1792,11 +2028,11 @@ unicodedata * The Unicode database has been updated to Unicode 16.0.0. -.. _whatsnew314-color-unittest: - unittest -------- +.. _whatsnew314-color-unittest: + * :mod:`unittest` output is now colored by default. This can be controlled by :ref:`environment variables `. @@ -1819,7 +2055,7 @@ unittest :meth:`~unittest.TestCase.assertNotStartsWith`, :meth:`~unittest.TestCase.assertEndsWith` and :meth:`~unittest.TestCase.assertNotEndsWith` check whether the Unicode - or byte string starts or ends with particular string(s). + or byte string starts or ends with particular strings. (Contributed by Serhiy Storchaka in :gh:`71339`.) @@ -1834,17 +2070,18 @@ urllib * Improve ergonomics and standards compliance when parsing and emitting ``file:`` URLs. - In :func:`urllib.request.url2pathname`: + In :func:`~urllib.request.url2pathname`: - Accept a complete URL when the new *require_scheme* argument is set to true. - Discard URL authority if it matches the local hostname. - Discard URL authority if it resolves to a local IP address when the new *resolve_host* argument is set to true. + - Discard URL query and fragment components. - Raise :exc:`~urllib.error.URLError` if a URL authority isn't local, except on Windows where we return a UNC path as before. - In :func:`urllib.request.pathname2url`: + In :func:`~urllib.request.pathname2url`: - Return a complete URL when the new *add_scheme* argument is set to true. - Include an empty URL authority when a path begins with a slash. For @@ -1860,16 +2097,17 @@ urllib uuid ---- -* Add support for UUID versions 6, 7, and 8 via :func:`uuid.uuid6`, - :func:`uuid.uuid7`, and :func:`uuid.uuid8` respectively, as specified +* Add support for UUID versions 6, 7, and 8 via :func:`~uuid.uuid6`, + :func:`~uuid.uuid7`, and :func:`~uuid.uuid8` respectively, as specified in :rfc:`9562`. (Contributed by Bénédikt Tran in :gh:`89083`.) -* :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the +* :const:`~uuid.NIL` and :const:`~uuid.MAX` are now available to represent the Nil and Max UUID formats as defined by :rfc:`9562`. (Contributed by Nick Pope in :gh:`128427`.) -* Allow to generate multiple UUIDs at once via :option:`python -m uuid --count `. +* Allow generating multiple UUIDs simultaneously on the command-line via + :option:`python -m uuid --count `. (Contributed by Simon Legner in :gh:`131236`.) @@ -1884,233 +2122,177 @@ webbrowser supported browsers on macOS. -zipinfo +zipfile ------- -* Added :func:`ZipInfo._for_archive ` +* Added :meth:`ZipInfo._for_archive `, a method to resolve suitable defaults for a :class:`~zipfile.ZipInfo` object as used by :func:`ZipFile.writestr `. (Contributed by Bénédikt Tran in :gh:`123424`.) -* :meth:`zipfile.ZipFile.writestr` now respect ``SOURCE_DATE_EPOCH`` that - distributions can set centrally and have build tools consume this in order - to produce reproducible output. +* :meth:`.ZipFile.writestr` now respects the :envvar:`SOURCE_DATE_EPOCH` + environment variable in order to better support reproducible builds. (Contributed by Jiahao Li in :gh:`91279`.) .. Add improved modules above alphabetically, not here at the end. + Optimizations ============= * The import time for several standard library modules has been improved, - including :mod:`ast`, :mod:`asyncio`, :mod:`base64`, :mod:`cmd`, :mod:`csv`, - :mod:`gettext`, :mod:`importlib.util`, :mod:`locale`, :mod:`mimetypes`, - :mod:`optparse`, :mod:`pickle`, :mod:`pprint`, :mod:`pstats`, :mod:`socket`, - :mod:`subprocess`, :mod:`threading`, :mod:`tomllib`, and :mod:`zipfile`. + including :mod:`annotationlib`, :mod:`ast`, :mod:`asyncio`, :mod:`base64`, + :mod:`cmd`, :mod:`csv`, :mod:`gettext`, :mod:`importlib.util`, :mod:`locale`, + :mod:`mimetypes`, :mod:`optparse`, :mod:`pickle`, :mod:`pprint`, + :mod:`pstats`, :mod:`shlex`, :mod:`socket`, :mod:`string`, :mod:`subprocess`, + :mod:`threading`, :mod:`tomllib`, :mod:`types`, and :mod:`zipfile`. (Contributed by Adam Turner, Bénédikt Tran, Chris Markiewicz, Eli Schwartz, Hugo van Kemenade, Jelle Zijlstra, and others in :gh:`118761`.) +* The interpreter now avoids some reference count modifications internally + when it's safe to do so. + This can lead to different values being returned from :func:`sys.getrefcount` + and :c:func:`Py_REFCNT` compared to previous versions of Python. + See :ref:`below ` for details. + asyncio ------- -* :mod:`asyncio` now uses double linked list implementation for native tasks - which speeds up execution by 10% on standard pyperformance benchmarks and - reduces memory usage. +* Standard benchmark results have improved by 10-20% following the + implementation of a new per-thread doubly linked list + for :class:`native tasks `, + also reducing memory usage. + This enables external introspection tools such as + :ref:`python -m asyncio pstree ` + to introspect the call graph of asyncio tasks running in all threads. (Contributed by Kumar Aditya in :gh:`107803`.) -* :mod:`asyncio` has new utility functions for introspecting and printing - the program's call graph: :func:`asyncio.capture_call_graph` and - :func:`asyncio.print_call_graph`. - (Contributed by Yury Selivanov, Pablo Galindo Salgado, and Łukasz Langa - in :gh:`91048`.) +* The module now has first class support for + :term:`free-threading builds `. + This enables parallel execution of multiple event loops across + different threads, scaling linearly with the number of threads. + (Contributed by Kumar Aditya in :gh:`128002`.) + base64 ------ -* Improve the performance of :func:`base64.b16decode` by up to ten times, - and reduce the import time of :mod:`base64` by up to six times. - (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.) +* :func:`~base64.b16decode` is now up to six times faster. + (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner + in :gh:`118761`.) -io +bdb --- -* :mod:`io` which provides the built-in :func:`open` makes less system calls - when opening regular files as well as reading whole files. Reading a small - operating system cached file in full is up to 15% faster. - :func:`pathlib.Path.read_bytes` has the most optimizations for reading a - file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in - :gh:`120754` and :gh:`90102`.) +* The basic debugger now has a :mod:`sys.monitoring`-based backend, + which can be selected via the passing ``'monitoring'`` + to the :class:`~bdb.Bdb` class's new *backend* parameter. + (Contributed by Tian Gao in :gh:`124533`.) -uuid ----- -* Improve generation of :class:`~uuid.UUID` objects via their dedicated - functions: +difflib +------- - * :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are both roughly 40% faster - for 16-byte names and 20% faster for 1024-byte names. Performance for - longer names remains unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 40% faster - respectively. +* The :func:`~difflib.IS_LINE_JUNK` function is now up to twice as fast. + (Contributed by Adam Turner and Semyon Moroz in :gh:`130167`.) - (Contributed by Bénédikt Tran in :gh:`128150`.) +gc +-- -zlib ----- +* The new :ref:`incremental garbage collector ` + means that maximum pause times are reduced + by an order of magnitude or more for larger heaps. -* On Windows, ``zlib-ng`` is now used as the implementation of the - :mod:`zlib` module. This should produce compatible and comparable - results with better performance, though it is worth noting that - ``zlib.Z_BEST_SPEED`` (1) may result in significantly less - compression than the previous implementation (while also significantly - reducing the time taken to compress). - (Contributed by Steve Dower in :gh:`91349`.) + Because of this optimization, the meaning of the results of + :meth:`~gc.get_threshold` and :meth:`~gc.set_threshold` have changed, + along with :meth:`~gc.get_count` and :meth:`~gc.get_stats`. + - For backwards compatibility, :meth:`~gc.get_threshold` continues to return + a three-item tuple. + The first value is the threshold for young collections, as before; + the second value determines the rate at which the old collection is scanned + (the default is 10, and higher values mean that the old collection + is scanned more slowly). + The third value is now meaningless and is always zero. -Deprecated -========== + - :meth:`~gc.set_threshold` now ignores any items after the second. -* :mod:`argparse`: + - :meth:`~gc.get_count` and :meth:`~gc.get_stats` continue to return + the same format of results. + The only difference is that instead of the results referring to + the young, aging and old generations, + the results refer to the young generation + and the aging and collecting spaces of the old generation. - * Passing the undocumented keyword argument *prefix_chars* to - :meth:`~argparse.ArgumentParser.add_argument_group` is now - deprecated. - (Contributed by Savannah Ostrowski in :gh:`125563`.) - * Deprecated the :class:`argparse.FileType` type converter. - Anything with resource management should be done downstream after the - arguments are parsed. - (Contributed by Serhiy Storchaka in :gh:`58032`.) + In summary, code that attempted to manipulate the behavior of the cycle GC + may not work exactly as intended, but it is very unlikely to be harmful. + All other code will work just fine. -* :mod:`asyncio`: + (Contributed by Mark Shannon in :gh:`108362`.) - * :func:`!asyncio.iscoroutinefunction` is deprecated - and will be removed in Python 3.16; - use :func:`inspect.iscoroutinefunction` instead. - (Contributed by Jiahao Li and Kumar Aditya in :gh:`122875`.) - * :mod:`asyncio` policy system is deprecated and will be removed in Python 3.16. - In particular, the following classes and functions are deprecated: +io +--- - * :class:`asyncio.AbstractEventLoopPolicy` - * :class:`asyncio.DefaultEventLoopPolicy` - * :class:`asyncio.WindowsSelectorEventLoopPolicy` - * :class:`asyncio.WindowsProactorEventLoopPolicy` - * :func:`asyncio.get_event_loop_policy` - * :func:`asyncio.set_event_loop_policy` - * :func:`asyncio.set_event_loop` +* Opening and reading files now executes fewer system calls. + Reading a small operating system cached file in full is up to 15% faster. + (Contributed by Cody Maloney and Victor Stinner + in :gh:`120754` and :gh:`90102`.) - Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with - *loop_factory* to use the desired event loop implementation. - For example, to use :class:`asyncio.SelectorEventLoop` on Windows:: +pathlib +------- - import asyncio +* :func:`Path.read_bytes ` now uses unbuffered mode + to open files, which is between 9% and 17% faster to read in full. + (Contributed by Cody Maloney in :gh:`120754`.) - async def main(): - ... - asyncio.run(main(), loop_factory=asyncio.SelectorEventLoop) +pdb +--- - (Contributed by Kumar Aditya in :gh:`127949`.) +* :mod:`pdb` now supports two backends, based on either + :func:`sys.settrace` or :mod:`sys.monitoring`. + Using the :ref:`pdb CLI ` or :func:`breakpoint` + will always use the :mod:`sys.monitoring` backend. + Explicitly instantiating :class:`pdb.Pdb` and its derived classes + will use the :func:`sys.settrace` backend by default, which is configurable. + (Contributed by Tian Gao in :gh:`124533`.) -* :mod:`builtins`: - Passing a complex number as the *real* or *imag* argument in the - :func:`complex` constructor is now deprecated; it should only be passed - as a single positional argument. - (Contributed by Serhiy Storchaka in :gh:`109218`.) -* :mod:`codecs`: - :func:`codecs.open` is now deprecated. Use :func:`open` instead. - (Contributed by Inada Naoki in :gh:`133036`.) +uuid +---- -* :mod:`ctypes`: +* :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are now both roughly 40% faster + for 16-byte names and 20% faster for 1024-byte names. + Performance for longer names remains unchanged. + (Contributed by Bénédikt Tran in :gh:`128150`.) - * On non-Windows platforms, setting :attr:`.Structure._pack_` to use a - MSVC-compatible default memory layout is deprecated in favor of setting - :attr:`.Structure._layout_` to ``'ms'``. - (Contributed by Petr Viktorin in :gh:`131747`.) +* :func:`~uuid.uuid4` is now c. 30% faster. + (Contributed by Bénédikt Tran in :gh:`128150`.) - * Calling :func:`ctypes.POINTER` on a string is deprecated. - Use :ref:`ctypes-incomplete-types` for self-referential structures. - Also, the internal ``ctypes._pointer_type_cache`` is deprecated. - See :func:`ctypes.POINTER` for updated implementation details. - (Contributed by Sergey Myrianov in :gh:`100926`.) -* :mod:`functools`: - Calling the Python implementation of :func:`functools.reduce` with *function* - or *sequence* as keyword arguments is now deprecated. - (Contributed by Kirill Podoprigora in :gh:`121676`.) +zlib +---- -* :mod:`logging`: - Support for custom logging handlers with the *strm* argument is deprecated - and scheduled for removal in Python 3.16. Define handlers with the *stream* - argument instead. (Contributed by Mariusz Felisiak in :gh:`115032`.) +* On Windows, `zlib-ng `__ + is now used as the implementation of the :mod:`zlib` module + in the default binaries. + There are no known incompatibilities between ``zlib-ng`` + and the previously-used ``zlib`` implementation. + This should result in better performance at all compression levels. -* :mod:`mimetypes`: - Valid extensions start with a '.' or are empty for - :meth:`mimetypes.MimeTypes.add_type`. - Undotted extensions are deprecated and will - raise a :exc:`ValueError` in Python 3.16. - (Contributed by Hugo van Kemenade in :gh:`75223`.) - -* :mod:`!nturl2path`: This module is now deprecated. Call - :func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url` - instead. - (Contributed by Barney Gale in :gh:`125866`.) - -* :mod:`os`: - :term:`Soft deprecate ` :func:`os.popen` and - :func:`os.spawn* ` functions. They should no longer be used to - write new code. The :mod:`subprocess` module is recommended instead. - (Contributed by Victor Stinner in :gh:`120743`.) - -* :mod:`pathlib`: - :meth:`!pathlib.PurePath.as_uri` is deprecated and will be removed in Python - 3.19. Use :meth:`pathlib.Path.as_uri` instead. - (Contributed by Barney Gale in :gh:`123599`.) - -* :mod:`pdb`: - The undocumented ``pdb.Pdb.curframe_locals`` attribute is now a deprecated - read-only property. The low overhead dynamic frame locals access added in - Python 3.13 by PEP 667 means the frame locals cache reference previously - stored in this attribute is no longer needed. Derived debuggers should access - ``pdb.Pdb.curframe.f_locals`` directly in Python 3.13 and later versions. - (Contributed by Tian Gao in :gh:`124369` and :gh:`125951`.) - -* :mod:`symtable`: - Deprecate :meth:`symtable.Class.get_methods` due to the lack of interest. - (Contributed by Bénédikt Tran in :gh:`119698`.) - -* :mod:`tkinter`: - The :class:`!tkinter.Variable` methods :meth:`!trace_variable`, - :meth:`!trace_vdelete` and :meth:`!trace_vinfo` are now deprecated. - Use :meth:`!trace_add`, :meth:`!trace_remove` and :meth:`!trace_info` - instead. - (Contributed by Serhiy Storchaka in :gh:`120220`.) - -* :mod:`urllib.parse`: - Accepting objects with false values (like ``0`` and ``[]``) except empty - strings, byte-like objects and ``None`` in :mod:`urllib.parse` functions - :func:`~urllib.parse.parse_qsl` and :func:`~urllib.parse.parse_qs` is now - deprecated. - (Contributed by Serhiy Storchaka in :gh:`116897`.) - -.. Add deprecations above alphabetically, not here at the end. - -.. include:: ../deprecations/pending-removal-in-3.15.rst - -.. include:: ../deprecations/pending-removal-in-3.16.rst - -.. include:: ../deprecations/pending-removal-in-3.17.rst + It is worth noting that ``zlib.Z_BEST_SPEED`` (``1``) may result in + significantly less compression than the previous implementation, + whilst also significantly reducing the time taken to compress. -.. include:: ../deprecations/pending-removal-in-3.19.rst + (Contributed by Steve Dower in :gh:`91349`.) -.. include:: ../deprecations/pending-removal-in-future.rst Removed ======= @@ -2119,86 +2301,102 @@ argparse -------- * Remove the *type*, *choices*, and *metavar* parameters - of :class:`!argparse.BooleanOptionalAction`. - They were deprecated since 3.12. - -* Calling :meth:`~argparse.ArgumentParser.add_argument_group` on an argument - group, and calling :meth:`~argparse.ArgumentParser.add_argument_group` or - :meth:`~argparse.ArgumentParser.add_mutually_exclusive_group` on a mutually - exclusive group now raise exceptions. This nesting was never supported, - often failed to work correctly, and was unintentionally exposed through - inheritance. This functionality has been deprecated since Python 3.11. + of :class:`!BooleanOptionalAction`. + These have been deprecated since Python 3.12. + (Contributed by Nikita Sobolev in :gh:`118805`.) + +* Calling :meth:`~argparse.ArgumentParser.add_argument_group` + on an argument group now raises a :exc:`ValueError`. + Similarly, :meth:`~argparse.ArgumentParser.add_argument_group` + or :meth:`~argparse.ArgumentParser.add_mutually_exclusive_group` + on a mutually exclusive group now both raise :exc:`ValueError`\ s. + This 'nesting' was never supported, often failed to work correctly, + and was unintentionally exposed through inheritance. + This functionality has been deprecated since Python 3.11. (Contributed by Savannah Ostrowski in :gh:`127186`.) + ast --- -* Remove the following classes. They were all deprecated since Python 3.8, - and have emitted deprecation warnings since Python 3.12: +* Remove the following classes, which have been deprecated aliases of + :class:`~ast.Constant` since Python 3.8 and have emitted + deprecation warnings since Python 3.12: + + * :class:`!Bytes` + * :class:`!Ellipsis` + * :class:`!NameConstant` + * :class:`!Num` + * :class:`!Str` - * :class:`!ast.Bytes` - * :class:`!ast.Ellipsis` - * :class:`!ast.NameConstant` - * :class:`!ast.Num` - * :class:`!ast.Str` + As a consequence of these removals, user-defined ``visit_Num``, ``visit_Str``, + ``visit_Bytes``, ``visit_NameConstant`` and ``visit_Ellipsis`` methods + on custom :class:`~ast.NodeVisitor` subclasses will no longer be called + when the :class:`!NodeVisitor` subclass is visiting an AST. + Define a ``visit_Constant`` method instead. - Use :class:`ast.Constant` instead. As a consequence of these removals, - user-defined ``visit_Num``, ``visit_Str``, ``visit_Bytes``, - ``visit_NameConstant`` and ``visit_Ellipsis`` methods on custom - :class:`ast.NodeVisitor` subclasses will no longer be called when the - :class:`!NodeVisitor` subclass is visiting an AST. Define a ``visit_Constant`` - method instead. + (Contributed by Alex Waygood in :gh:`119562`.) - Also, remove the following deprecated properties on :class:`ast.Constant`, +* Remove the following deprecated properties on :class:`ast.Constant`, which were present for compatibility with the now-removed AST classes: - * :attr:`!ast.Constant.n` - * :attr:`!ast.Constant.s` + * :attr:`!Constant.n` + * :attr:`!Constant.s` - Use :attr:`!ast.Constant.value` instead. + Use :attr:`!Constant.value` instead. (Contributed by Alex Waygood in :gh:`119562`.) + asyncio ------- -* Remove the following classes and functions. They were all deprecated and - emitted deprecation warnings since Python 3.12: +* Remove the following classes, methods, and functions, + which have been deprecated since Python 3.12: - * :func:`!asyncio.get_child_watcher` - * :func:`!asyncio.set_child_watcher` - * :meth:`!asyncio.AbstractEventLoopPolicy.get_child_watcher` - * :meth:`!asyncio.AbstractEventLoopPolicy.set_child_watcher` - * :class:`!asyncio.AbstractChildWatcher` - * :class:`!asyncio.FastChildWatcher` - * :class:`!asyncio.MultiLoopChildWatcher` - * :class:`!asyncio.PidfdChildWatcher` - * :class:`!asyncio.SafeChildWatcher` - * :class:`!asyncio.ThreadedChildWatcher` + * :class:`!AbstractChildWatcher` + * :class:`!FastChildWatcher` + * :class:`!MultiLoopChildWatcher` + * :class:`!PidfdChildWatcher` + * :class:`!SafeChildWatcher` + * :class:`!ThreadedChildWatcher` + * :meth:`!AbstractEventLoopPolicy.get_child_watcher` + * :meth:`!AbstractEventLoopPolicy.set_child_watcher` + * :func:`!get_child_watcher` + * :func:`!set_child_watcher` (Contributed by Kumar Aditya in :gh:`120804`.) -* Removed implicit creation of event loop by :func:`asyncio.get_event_loop`. - It now raises a :exc:`RuntimeError` if there is no current event loop. +* :func:`asyncio.get_event_loop` now raises a :exc:`RuntimeError` + if there is no current event loop, + and no longer implicitly creates an event loop. + (Contributed by Kumar Aditya in :gh:`126353`.) + .. TODO: move these patterns to the asyncio docs? + quite long for What's New + There's a few patterns that use :func:`asyncio.get_event_loop`, most of them can be replaced with :func:`asyncio.run`. If you're running an async function, simply use :func:`asyncio.run`. - Before:: + Before: - async def main(): - ... + .. code:: python + async def main(): + ... - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(main()) - finally: - loop.close() - After:: + loop = asyncio.get_event_loop() + try: + loop.run_until_complete(main()) + finally: + loop.close() + + After: + + .. code:: python async def main(): ... @@ -2209,410 +2407,984 @@ asyncio and then run forever, use :func:`asyncio.run` and an :class:`asyncio.Event`. - Before:: - - def start_server(loop): - ... - - loop = asyncio.get_event_loop() - try: - start_server(loop) - loop.run_forever() - finally: - loop.close() - - After:: + Before: - def start_server(loop): - ... + .. code:: python - async def main(): - start_server(asyncio.get_running_loop()) - await asyncio.Event().wait() + def start_server(loop): ... - asyncio.run(main()) + loop = asyncio.get_event_loop() + try: + start_server(loop) + loop.run_forever() + finally: + loop.close() - If you need to run something in an event loop, then run some blocking - code around it, use :class:`asyncio.Runner`. + After: - Before:: + .. code:: python - async def operation_one(): - ... + def start_server(loop): ... - def blocking_code(): - ... + async def main(): + start_server(asyncio.get_running_loop()) + await asyncio.Event().wait() - async def operation_two(): - ... + asyncio.run(main()) - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(operation_one()) - blocking_code() - loop.run_until_complete(operation_two()) - finally: - loop.close() + If you need to run something in an event loop, then run some blocking + code around it, use :class:`asyncio.Runner`. - After:: + Before: - async def operation_one(): - ... + .. code:: python - def blocking_code(): - ... + async def operation_one(): ... + def blocking_code(): ... + async def operation_two(): ... - async def operation_two(): - ... + loop = asyncio.get_event_loop() + try: + loop.run_until_complete(operation_one()) + blocking_code() + loop.run_until_complete(operation_two()) + finally: + loop.close() - with asyncio.Runner() as runner: - runner.run(operation_one()) - blocking_code() - runner.run(operation_two()) + After: + .. code:: python + async def operation_one(): ... + def blocking_code(): ... + async def operation_two(): ... -collections.abc ---------------- + with asyncio.Runner() as runner: + runner.run(operation_one()) + blocking_code() + runner.run(operation_two()) -* Remove :class:`!collections.abc.ByteString`. It had previously raised a - :exc:`DeprecationWarning` since Python 3.12. email ----- -* Remove the *isdst* parameter from :func:`email.utils.localtime`. +* Remove :func:`email.utils.localtime`'s *isdst* parameter, + which was deprecated in and has been ignored since Python 3.12. (Contributed by Hugo van Kemenade in :gh:`118798`.) -importlib ---------- - -* Remove deprecated :mod:`importlib.abc` classes: - * :class:`!importlib.abc.ResourceReader` - * :class:`!importlib.abc.Traversable` - * :class:`!importlib.abc.TraversableResources` +importlib.abc +------------- - Use :mod:`importlib.resources.abc` classes instead: +* Remove deprecated :mod:`importlib.abc` classes: - * :class:`importlib.resources.abc.Traversable` - * :class:`importlib.resources.abc.TraversableResources` + * :class:`!ResourceReader` + (use :class:`~importlib.resources.abc.TraversableResources`) + * :class:`!Traversable` + (use :class:`~importlib.resources.abc.Traversable`) + * :class:`!TraversableResources` + (use :class:`~importlib.resources.abc.TraversableResources`) (Contributed by Jason R. Coombs and Hugo van Kemenade in :gh:`93963`.) + itertools --------- -* Remove :mod:`itertools` support for copy, deepcopy, and pickle operations. - These had previously raised a :exc:`DeprecationWarning` since Python 3.12. +* Remove support for copy, deepcopy, and pickle operations + from :mod:`itertools` iterators. + These have emitted a :exc:`DeprecationWarning` since Python 3.12. (Contributed by Raymond Hettinger in :gh:`101588`.) + pathlib ------- -* Remove support for passing additional keyword arguments to - :class:`pathlib.Path`. In previous versions, any such arguments are ignored. +* Remove support for passing additional keyword arguments + to :class:`~pathlib.Path`. + In previous versions, any such arguments are ignored. + (Contributed by Barney Gale in :gh:`74033`.) + * Remove support for passing additional positional arguments to - :meth:`pathlib.PurePath.relative_to` and - :meth:`~pathlib.PurePath.is_relative_to`. In previous versions, any such - arguments are joined onto *other*. + :meth:`.PurePath.relative_to` and :meth:`~pathlib.PurePath.is_relative_to`. + In previous versions, any such arguments are joined onto *other*. + (Contributed by Barney Gale in :gh:`78707`.) + pkgutil ------- -* Remove deprecated :func:`!pkgutil.get_loader` and :func:`!pkgutil.find_loader`. - These had previously raised a :exc:`DeprecationWarning` since Python 3.12. +* Remove the :func:`!get_loader` and :func:`!find_loader` functions, + which have been deprecated since Python 3.12. (Contributed by Bénédikt Tran in :gh:`97850`.) + pty --- -* Remove deprecated :func:`!pty.master_open` and :func:`!pty.slave_open`. - They had previously raised a :exc:`DeprecationWarning` since Python 3.12. +* Remove the :func:`!master_open` and :func:`!slave_open` functions, + which have been deprecated since Python 3.12. Use :func:`pty.openpty` instead. (Contributed by Nikita Sobolev in :gh:`118824`.) + sqlite3 ------- -* Remove :data:`!version` and :data:`!version_info` from :mod:`sqlite3`. +* Remove :data:`!version` and :data:`!version_info` from + the :mod:`sqlite3` module; + use :data:`~sqlite3.sqlite_version` and :data:`~sqlite3.sqlite_version_info` + for the actual version number of the runtime SQLite library. (Contributed by Hugo van Kemenade in :gh:`118924`.) -* Disallow using a sequence of parameters with named placeholders. - This had previously raised a :exc:`DeprecationWarning` since Python 3.12; - it will now raise a :exc:`sqlite3.ProgrammingError`. +* Using a sequence of parameters with named placeholders now + raises a :exc:`~sqlite3.ProgrammingError`, + having been deprecated since Python 3.12. (Contributed by Erlend E. Aasland in :gh:`118928` and :gh:`101693`.) -typing ------- - -* Remove :class:`!typing.ByteString`. It had previously raised a - :exc:`DeprecationWarning` since Python 3.12. - -* :class:`typing.TypeAliasType` now supports star unpacking. urllib ------ -* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. - It had previously raised a :exc:`DeprecationWarning` since Python 3.11. +* Remove the :class:`!Quoter` class from :mod:`urllib.parse`, + which has been deprecated since Python 3.11. (Contributed by Nikita Sobolev in :gh:`118827`.) -* Remove deprecated :class:`!URLopener` and :class:`!FancyURLopener` classes - from :mod:`urllib.request`. They had previously raised a - :exc:`DeprecationWarning` since Python 3.3. - - ``myopener.open()`` can be replaced with :func:`~urllib.request.urlopen`, - and ``myopener.retrieve()`` can be replaced with - :func:`~urllib.request.urlretrieve`. Customizations to the opener - classes can be replaced by passing customized handlers to - :func:`~urllib.request.build_opener`. + +* Remove the :class:`!URLopener` and :class:`!FancyURLopener` classes + from :mod:`urllib.request`, + which have been deprecated since Python 3.3. + + ``myopener.open()`` can be replaced with :func:`~urllib.request.urlopen`. + ``myopener.retrieve()`` can be replaced with + :func:`~urllib.request.urlretrieve`. + Customisations to the opener classes can be replaced by passing + customized handlers to :func:`~urllib.request.build_opener`. (Contributed by Barney Gale in :gh:`84850`.) -Others ------- -* Using :data:`NotImplemented` in a boolean context will now raise a :exc:`TypeError`. - It had previously raised a :exc:`DeprecationWarning` since Python 3.9. (Contributed - by Jelle Zijlstra in :gh:`118767`.) +Deprecated +========== -* The :func:`int` built-in no longer delegates to - :meth:`~object.__trunc__`. Classes that want to support conversion to - integer must implement either :meth:`~object.__int__` or - :meth:`~object.__index__`. (Contributed by Mark Dickinson in :gh:`119743`.) +New deprecations +---------------- +* Passing a complex number as the *real* or *imag* argument in the + :func:`complex` constructor is now deprecated; + complex numbers should only be passed as a single positional argument. + (Contributed by Serhiy Storchaka in :gh:`109218`.) -CPython bytecode changes -======================== +* :mod:`argparse`: -* Replaced the opcode ``BINARY_SUBSCR`` by :opcode:`BINARY_OP` with oparg ``NB_SUBSCR``. - (Contributed by Irit Katriel in :gh:`100239`.) + * Passing the undocumented keyword argument *prefix_chars* to the + :meth:`~argparse.ArgumentParser.add_argument_group` method is now deprecated. + (Contributed by Savannah Ostrowski in :gh:`125563`.) -Porting to Python 3.14 -====================== + * Deprecated the :class:`argparse.FileType` type converter. + Anything relating to resource management should be handled + downstream, after the arguments have been parsed. + (Contributed by Serhiy Storchaka in :gh:`58032`.) -This section lists previously described changes and other bugfixes -that may require changes to your code. +* :mod:`asyncio`: -Changes in the Python API -------------------------- + * The :func:`!asyncio.iscoroutinefunction` is now deprecated + and will be removed in Python 3.16; + use :func:`inspect.iscoroutinefunction` instead. + (Contributed by Jiahao Li and Kumar Aditya in :gh:`122875`.) -* :class:`functools.partial` is now a method descriptor. - Wrap it in :func:`staticmethod` if you want to preserve the old behavior. - (Contributed by Serhiy Storchaka and Dominykas Grigonis in :gh:`121027`.) + * The :mod:`asyncio` policy system is deprecated + and will be removed in Python 3.16. + In particular, the following classes and functions are deprecated: -* The :func:`locale.nl_langinfo` function now sets temporarily the ``LC_CTYPE`` - locale in some cases. - This temporary change affects other threads. - (Contributed by Serhiy Storchaka in :gh:`69998`.) + * :class:`asyncio.AbstractEventLoopPolicy` + * :class:`asyncio.DefaultEventLoopPolicy` + * :class:`asyncio.WindowsSelectorEventLoopPolicy` + * :class:`asyncio.WindowsProactorEventLoopPolicy` + * :func:`asyncio.get_event_loop_policy` + * :func:`asyncio.set_event_loop_policy` -* :class:`types.UnionType` is now an alias for :class:`typing.Union`, - causing changes in some behaviors. - See :ref:`above ` for more details. - (Contributed by Jelle Zijlstra in :gh:`105499`.) + Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with + the *loop_factory* argument to use the desired event loop implementation. + For example, to use :class:`asyncio.SelectorEventLoop` on Windows: -Build changes -============= + .. code-block:: python -* GNU Autoconf 2.72 is now required to generate :file:`configure`. - (Contributed by Erlend Aasland in :gh:`115765`.) + import asyncio -* ``#pragma``-based linking with ``python3*.lib`` can now be switched off - with :c:expr:`Py_NO_LINK_LIB`. (Contributed by Jean-Christophe - Fillion-Robin in :gh:`82909`.) + async def main(): + ... -.. _whatsnew314-pep761: + asyncio.run(main(), loop_factory=asyncio.SelectorEventLoop) -PEP 761: Discontinuation of PGP signatures ------------------------------------------- + (Contributed by Kumar Aditya in :gh:`127949`.) -PGP signatures will not be available for CPython 3.14 and onwards. -Users verifying artifacts must use `Sigstore verification materials`_ for -verifying CPython artifacts. This change in release process is specified -in :pep:`761`. +* :mod:`codecs`: + The :func:`codecs.open` function is now deprecated, + and will be removed in a future version of Python. + Use :func:`open` instead. + (Contributed by Inada Naoki in :gh:`133036`.) -.. _Sigstore verification materials: https://www.python.org/downloads/metadata/sigstore/ +* :mod:`ctypes`: + * On non-Windows platforms, setting :attr:`.Structure._pack_` to use a + MSVC-compatible default memory layout is now deprecated in favor of setting + :attr:`.Structure._layout_` to ``'ms'``, and will be removed in Python 3.19. + (Contributed by Petr Viktorin in :gh:`131747`.) -C API changes -============= + * Calling :func:`ctypes.POINTER` on a string is now deprecated. + Use :ref:`incomplete types ` + for self-referential structures. + Also, the internal ``ctypes._pointer_type_cache`` is deprecated. + See :func:`ctypes.POINTER` for updated implementation details. + (Contributed by Sergey Myrianov in :gh:`100926`.) -New features ------------- +* :mod:`functools`: + Calling the Python implementation of :func:`functools.reduce` with *function* + or *sequence* as keyword arguments is now deprecated; + the parameters will be made positional-only in Python 3.16. + (Contributed by Kirill Podoprigora in :gh:`121676`.) -* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects. - (Contributed by Sergey B Kirpichev in :gh:`116560`.) +* :mod:`logging`: + Support for custom logging handlers with the *strm* argument + is now deprecated and scheduled for removal in Python 3.16. + Define handlers with the *stream* argument instead. + (Contributed by Mariusz Felisiak in :gh:`115032`.) -* Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` - object: +* :mod:`mimetypes`: + Valid extensions are either empty or must start with '.' for + :meth:`mimetypes.MimeTypes.add_type`. + Undotted extensions are deprecated and will + raise a :exc:`ValueError` in Python 3.16. + (Contributed by Hugo van Kemenade in :gh:`75223`.) - * :c:func:`PyUnicodeWriter_Create` - * :c:func:`PyUnicodeWriter_DecodeUTF8Stateful` - * :c:func:`PyUnicodeWriter_Discard` - * :c:func:`PyUnicodeWriter_Finish` - * :c:func:`PyUnicodeWriter_Format` - * :c:func:`PyUnicodeWriter_WriteChar` - * :c:func:`PyUnicodeWriter_WriteRepr` - * :c:func:`PyUnicodeWriter_WriteStr` - * :c:func:`PyUnicodeWriter_WriteSubstring` - * :c:func:`PyUnicodeWriter_WriteUCS4` - * :c:func:`PyUnicodeWriter_WriteUTF8` - * :c:func:`PyUnicodeWriter_WriteWideChar` +* :mod:`!nturl2path`: + This module is now deprecated. Call :func:`urllib.request.url2pathname` + and :func:`~urllib.request.pathname2url` instead. + (Contributed by Barney Gale in :gh:`125866`.) - (Contributed by Victor Stinner in :gh:`119182`.) +* :mod:`os`: + The :func:`os.popen` and :func:`os.spawn* ` functions + are now :term:`soft deprecated`. + They should no longer be used to write new code. + The :mod:`subprocess` module is recommended instead. + (Contributed by Victor Stinner in :gh:`120743`.) -* Add :c:func:`PyIter_NextItem` to replace :c:func:`PyIter_Next`, - which has an ambiguous return value. - (Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.) +* :mod:`pathlib`: + :meth:`!pathlib.PurePath.as_uri` is now deprecated + and scheduled for removal in Python 3.19. + Use :meth:`pathlib.Path.as_uri` instead. + (Contributed by Barney Gale in :gh:`123599`.) -* Add :c:func:`PyLong_IsPositive`, :c:func:`PyLong_IsNegative` - and :c:func:`PyLong_IsZero` for checking if :c:type:`PyLongObject` - is positive, negative, or zero, respectively. - (Contributed by James Roy and Sergey B Kirpichev in :gh:`126061`.) +* :mod:`pdb`: + The undocumented ``pdb.Pdb.curframe_locals`` attribute is now a deprecated + read-only property, which will be removed in a future version of Python. + The low overhead dynamic frame locals access added in Python 3.13 by :pep:`667` + means the frame locals cache reference previously stored in this attribute + is no longer needed. Derived debuggers should access + ``pdb.Pdb.curframe.f_locals`` directly in Python 3.13 and later versions. + (Contributed by Tian Gao in :gh:`124369` and :gh:`125951`.) -* Add new functions to convert C ```` numbers from/to Python - :class:`int`: +* :mod:`symtable`: + Deprecate :meth:`symtable.Class.get_methods` due to the lack of interest, + scheduled for removal in Python 3.16. + (Contributed by Bénédikt Tran in :gh:`119698`.) - * :c:func:`PyLong_AsInt32` - * :c:func:`PyLong_AsInt64` - * :c:func:`PyLong_AsUInt32` - * :c:func:`PyLong_AsUInt64` - * :c:func:`PyLong_FromInt32` - * :c:func:`PyLong_FromInt64` - * :c:func:`PyLong_FromUInt32` - * :c:func:`PyLong_FromUInt64` +* :mod:`tkinter`: + The :class:`!tkinter.Variable` methods :meth:`!trace_variable`, + :meth:`!trace_vdelete` and :meth:`!trace_vinfo` are now deprecated. + Use :meth:`!trace_add`, :meth:`!trace_remove` and :meth:`!trace_info` instead. + (Contributed by Serhiy Storchaka in :gh:`120220`.) - (Contributed by Victor Stinner in :gh:`120389`.) +* :mod:`urllib.parse`: + Accepting objects with false values (like ``0`` and ``[]``) except empty + strings, bytes-like objects and ``None`` in :func:`~urllib.parse.parse_qsl` + and :func:`~urllib.parse.parse_qs` is now deprecated. + (Contributed by Serhiy Storchaka in :gh:`116897`.) -* Add :c:func:`PyBytes_Join(sep, iterable) ` function, - similar to ``sep.join(iterable)`` in Python. - (Contributed by Victor Stinner in :gh:`121645`.) +.. Add deprecations above alphabetically, not here at the end. -* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer. - (Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.) +.. include:: ../deprecations/pending-removal-in-3.15.rst -* Add functions to get and set the current runtime Python configuration - (:pep:`741`): +.. include:: ../deprecations/pending-removal-in-3.16.rst - * :c:func:`PyConfig_Get` - * :c:func:`PyConfig_GetInt` - * :c:func:`PyConfig_Set` - * :c:func:`PyConfig_Names` +.. include:: ../deprecations/pending-removal-in-3.17.rst - (Contributed by Victor Stinner in :gh:`107954`.) +.. include:: ../deprecations/pending-removal-in-3.19.rst -* Add functions to configure the Python initialization (:pep:`741`): +.. include:: ../deprecations/pending-removal-in-future.rst - * :c:func:`Py_InitializeFromInitConfig` - * :c:func:`PyInitConfig_AddModule` - * :c:func:`PyInitConfig_Create` - * :c:func:`PyInitConfig_Free` - * :c:func:`PyInitConfig_FreeStrList` - * :c:func:`PyInitConfig_GetError` - * :c:func:`PyInitConfig_GetExitCode` - * :c:func:`PyInitConfig_GetInt` - * :c:func:`PyInitConfig_GetStr` - * :c:func:`PyInitConfig_GetStrList` - * :c:func:`PyInitConfig_HasOption` - * :c:func:`PyInitConfig_SetInt` - * :c:func:`PyInitConfig_SetStr` - * :c:func:`PyInitConfig_SetStrList` - (Contributed by Victor Stinner in :gh:`107954`.) +CPython bytecode changes +======================== -* Add a new import and export API for Python :class:`int` objects (:pep:`757`): +* Replaced the opcode :opcode:`!BINARY_SUBSCR` by the :opcode:`BINARY_OP` + opcode with the ``NB_SUBSCR`` oparg. + (Contributed by Irit Katriel in :gh:`100239`.) - * :c:func:`PyLong_GetNativeLayout` - * :c:func:`PyLong_Export` - * :c:func:`PyLong_FreeExport` - * :c:func:`PyLongWriter_Create` - * :c:func:`PyLongWriter_Finish` - * :c:func:`PyLongWriter_Discard` +* Add the :opcode:`BUILD_INTERPOLATION` and :opcode:`BUILD_TEMPLATE` + opcodes to construct new :class:`~string.templatelib.Interpolation` + and :class:`~string.templatelib.Template` instances, respectively. + (Contributed by Lysandros Nikolaou and others in :gh:`132661`; + see also :ref:`PEP 750: Template strings `). - (Contributed by Sergey B Kirpichev and Victor Stinner in :gh:`102471`.) +* Remove the :opcode:`!BUILD_CONST_KEY_MAP` opcode. + Use :opcode:`BUILD_MAP` instead. + (Contributed by Mark Shannon in :gh:`122160`.) -* Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier - superclass identification, which attempts to resolve the `type checking issue - `__ mentioned in :pep:`630` - (:gh:`124153`). +* Replace the :opcode:`!LOAD_ASSERTION_ERROR` opcode with + :opcode:`LOAD_COMMON_CONSTANT` and add support for loading + :exc:`NotImplementedError`. -* Add :c:func:`PyUnicode_Equal` function to the limited C API: - test if two strings are equal. - (Contributed by Victor Stinner in :gh:`124502`.) +* Add the :opcode:`LOAD_FAST_BORROW` and :opcode:`LOAD_FAST_BORROW_LOAD_FAST_BORROW` + opcodes to reduce reference counting overhead when the interpreter can prove + that the reference in the frame outlives the reference loaded onto the stack. + (Contributed by Matt Page in :gh:`130704`.) -* Add :c:func:`PyType_Freeze` function to make a type immutable. - (Contributed by Victor Stinner in :gh:`121654`.) +* Add the :opcode:`LOAD_SMALL_INT` opcode, which pushes a small integer + equal to the ``oparg`` to the stack. + The :opcode:`!RETURN_CONST` opcode is removed as it is no longer used. + (Contributed by Mark Shannon in :gh:`125837`.) -* Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling - deferred reference counting, as outlined in :pep:`703`. +* Add the new :opcode:`LOAD_SPECIAL` instruction. + Generate code for :keyword:`with` and :keyword:`async with` statements + using the new instruction. + Removed the :opcode:`!BEFORE_WITH` and :opcode:`!BEFORE_ASYNC_WITH` instructions. + (Contributed by Mark Shannon in :gh:`120507`.) -* Add :c:func:`PyMonitoring_FireBranchLeftEvent` and - :c:func:`PyMonitoring_FireBranchRightEvent` for generating - :monitoring-event:`BRANCH_LEFT` and :monitoring-event:`BRANCH_RIGHT` - events, respectively. +* Add the :opcode:`POP_ITER` opcode to support 'virtual' iterators. + (Contributed by Mark Shannon in :gh:`132554`.) -* Add :c:func:`Py_fopen` function to open a file. Similar to the - :c:func:`!fopen` function, but the *path* parameter is a Python object and an - exception is set on error. Add also :c:func:`Py_fclose` function to close a - file. - (Contributed by Victor Stinner in :gh:`127350`.) -* Add support of nullable arguments in :c:func:`PyArg_ParseTuple` and - similar functions. - Adding ``?`` after any format unit makes ``None`` be accepted as a value. - (Contributed by Serhiy Storchaka in :gh:`112068`.) +Pseudo-instructions +------------------- -* The ``k`` and ``K`` formats in :c:func:`PyArg_ParseTuple` and - similar functions now use :meth:`~object.__index__` if available, - like all other integer formats. - (Contributed by Serhiy Storchaka in :gh:`112068`.) +* Add the :opcode:`!ANNOTATIONS_PLACEHOLDER` pseudo instruction + to support partially executed module-level annotations with + :ref:`deferred evaluation of annotations `. + (Contributed by Jelle Zijlstra in :gh:`130907`.) + +* Add the :opcode:`!BINARY_OP_EXTEND` pseudo instruction, + which executes a pair of functions (guard and specialization functions) + accessed from the inline cache. + (Contributed by Irit Katriel in :gh:`100239`.) + +* Add three specializations for :opcode:`CALL_KW`; + :opcode:`!CALL_KW_PY` for calls to Python functions, + :opcode:`!CALL_KW_BOUND_METHOD` for calls to bound methods, and + :opcode:`!CALL_KW_NON_PY` for all other calls. + (Contributed by Mark Shannon in :gh:`118093`.) + +* Add the :opcode:`JUMP_IF_TRUE` and :opcode:`JUMP_IF_FALSE` pseudo instructions, + conditional jumps which do not impact the stack. + Replaced by the sequence ``COPY 1``, ``TO_BOOL``, ``POP_JUMP_IF_TRUE/FALSE``. + (Contributed by Irit Katriel in :gh:`124285`.) + +* Add the :opcode:`!LOAD_CONST_MORTAL` pseudo instruction. + (Contributed by Mark Shannon in :gh:`128685`.) + +* Add the :opcode:`LOAD_CONST_IMMORTAL` pseudo instruction, + which does the same as :opcode:`!LOAD_CONST`, but is more efficient + for immortal objects. + (Contributed by Mark Shannon in :gh:`125837`.) + +* Add the :opcode:`NOT_TAKEN` pseudo instruction, used by :mod:`sys.monitoring` + to record branch events (such as :monitoring-event:`BRANCH_LEFT`). + (Contributed by Mark Shannon in :gh:`122548`.) + + +C API changes +============= + +.. _whatsnew314-capi-config: + +Python configuration C API +-------------------------- + +Add a :ref:`PyInitConfig C API ` to configure the Python +initialization without relying on C structures and the ability to make +ABI-compatible changes in the future. + +Complete the :pep:`587` :ref:`PyConfig C API ` by adding +:c:func:`PyInitConfig_AddModule` which can be used to add a built-in extension +module; a feature previously referred to as the "inittab". + +Add :c:func:`PyConfig_Get` and :c:func:`PyConfig_Set` functions to get and set +the current runtime configuration. + +:pep:`587` 'Python Initialization Configuration' unified all the ways +to configure Python's initialization. This PEP also unifies the configuration +of Python's preinitialization and initialization in a single API. +Moreover, this PEP only provides a single choice to embed Python, +instead of having two 'Python' and 'Isolated' choices (PEP 587), +to further simplify the API. + +The lower level PEP 587 PyConfig API remains available for use cases +with an intentionally higher level of coupling to CPython implementation details +(such as emulating the full functionality of CPython's CLI, including its +configuration mechanisms). + +(Contributed by Victor Stinner in :gh:`107954`.) -* Add macros :c:func:`Py_PACK_VERSION` and :c:func:`Py_PACK_FULL_VERSION` for - bit-packing Python version numbers. +.. seealso:: :pep:`741` and :pep:`587` + + +New features in the C API +------------------------- + +* Add :c:func:`Py_PACK_VERSION` and :c:func:`Py_PACK_FULL_VERSION`, + two new macros for bit-packing Python version numbers. + This is useful for comparisons with :c:var:`Py_Version` + or :c:macro:`PY_VERSION_HEX`. (Contributed by Petr Viktorin in :gh:`128629`.) -* Add :c:func:`PyUnstable_IsImmortal` for determining whether an object is :term:`immortal`, - for debugging purposes. +* Add :c:func:`PyBytes_Join(sep, iterable) ` function, + similar to ``sep.join(iterable)`` in Python. + (Contributed by Victor Stinner in :gh:`121645`.) + +* Add functions to manipulate the configuration of the current + runtime Python interpreter + (:ref:`PEP 741: Python configuration C API `): + + * :c:func:`PyConfig_Get` + * :c:func:`PyConfig_GetInt` + * :c:func:`PyConfig_Set` + * :c:func:`PyConfig_Names` + + (Contributed by Victor Stinner in :gh:`107954`.) + +* Add functions to configure Python initialization + (:ref:`PEP 741: Python configuration C API `): + + * :c:func:`Py_InitializeFromInitConfig` + * :c:func:`PyInitConfig_AddModule` + * :c:func:`PyInitConfig_Create` + * :c:func:`PyInitConfig_Free` + * :c:func:`PyInitConfig_FreeStrList` + * :c:func:`PyInitConfig_GetError` + * :c:func:`PyInitConfig_GetExitCode` + * :c:func:`PyInitConfig_GetInt` + * :c:func:`PyInitConfig_GetStr` + * :c:func:`PyInitConfig_GetStrList` + * :c:func:`PyInitConfig_HasOption` + * :c:func:`PyInitConfig_SetInt` + * :c:func:`PyInitConfig_SetStr` + * :c:func:`PyInitConfig_SetStrList` + + (Contributed by Victor Stinner in :gh:`107954`.) + +* Add :c:func:`Py_fopen` function to open a file. + This works similarly to the standard C :c:func:`!fopen` function, + instead accepting a Python object for the *path* parameter + and setting an exception on error. + The corresponding new :c:func:`Py_fclose` function should be used + to close a file. + (Contributed by Victor Stinner in :gh:`127350`.) + +* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer. + (Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.) * Add :c:func:`PyImport_ImportModuleAttr` and :c:func:`PyImport_ImportModuleAttrString` helper functions to import a module and get an attribute of the module. (Contributed by Victor Stinner in :gh:`128911`.) -* Add support for a new ``p`` format unit in :c:func:`Py_BuildValue` that allows to - take a C integer and produce a Python :class:`bool` object. (Contributed by - Pablo Galindo in :issue:`45325`.) +* Add :c:func:`PyIter_NextItem` to replace :c:func:`PyIter_Next`, + which has an ambiguous return value. + (Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.) + +* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects. + (Contributed by Sergey B Kirpichev in :gh:`116560`.) + +* Add :c:func:`PyLong_IsPositive`, :c:func:`PyLong_IsNegative` + and :c:func:`PyLong_IsZero` for checking if :c:type:`PyLongObject` + is positive, negative, or zero, respectively. + (Contributed by James Roy and Sergey B Kirpichev in :gh:`126061`.) + +* Add new functions to convert C ```` numbers to/from + Python :class:`int` objects: + + * :c:func:`PyLong_AsInt32` + * :c:func:`PyLong_AsInt64` + * :c:func:`PyLong_AsUInt32` + * :c:func:`PyLong_AsUInt64` + * :c:func:`PyLong_FromInt32` + * :c:func:`PyLong_FromInt64` + * :c:func:`PyLong_FromUInt32` + * :c:func:`PyLong_FromUInt64` + + (Contributed by Victor Stinner in :gh:`120389`.) + +* Add a new import and export API for Python :class:`int` objects + (:pep:`757`): + + * :c:func:`PyLong_GetNativeLayout` + * :c:func:`PyLong_Export` + * :c:func:`PyLong_FreeExport` + * :c:func:`PyLongWriter_Create` + * :c:func:`PyLongWriter_Finish` + * :c:func:`PyLongWriter_Discard` + + (Contributed by Sergey B Kirpichev and Victor Stinner in :gh:`102471`.) + +* Add :c:func:`PyMonitoring_FireBranchLeftEvent` and + :c:func:`PyMonitoring_FireBranchRightEvent` for generating + :monitoring-event:`BRANCH_LEFT` and :monitoring-event:`BRANCH_RIGHT` + events, respectively. + (Contributed by Mark Shannon in :gh:`122548`.) + +* Add :c:func:`PyType_Freeze` function to make a type immutable. + (Contributed by Victor Stinner in :gh:`121654`.) + +* Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot + for easier superclass identification, which attempts to resolve the + type checking issue mentioned in :pep:`PEP 630 <630#type-checking>`. + (Contributed in :gh:`124153`.) + +* Add a new :c:func:`PyUnicode_Equal` function to test if two + strings are equal. + The function is also added to the Limited C API. + (Contributed by Victor Stinner in :gh:`124502`.) -* Add :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary` to determine if an object - is a unique temporary object on the interpreter's operand stack. This can - be used in some cases as a replacement for checking if :c:func:`Py_REFCNT` - is ``1`` for Python objects passed as arguments to C API functions. +* Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` + object, with the following functions: -* Add :c:func:`PyUnstable_Object_IsUniquelyReferenced` as a replacement for - ``Py_REFCNT(op) == 1`` on :term:`free threaded ` builds. + * :c:func:`PyUnicodeWriter_Create` + * :c:func:`PyUnicodeWriter_DecodeUTF8Stateful` + * :c:func:`PyUnicodeWriter_Discard` + * :c:func:`PyUnicodeWriter_Finish` + * :c:func:`PyUnicodeWriter_Format` + * :c:func:`PyUnicodeWriter_WriteASCII` + * :c:func:`PyUnicodeWriter_WriteChar` + * :c:func:`PyUnicodeWriter_WriteRepr` + * :c:func:`PyUnicodeWriter_WriteStr` + * :c:func:`PyUnicodeWriter_WriteSubstring` + * :c:func:`PyUnicodeWriter_WriteUCS4` + * :c:func:`PyUnicodeWriter_WriteUTF8` + * :c:func:`PyUnicodeWriter_WriteWideChar` + + (Contributed by Victor Stinner in :gh:`119182`.) + +* The ``k`` and ``K`` formats in :c:func:`PyArg_ParseTuple` and + similar functions now use :meth:`~object.__index__` if available, + like all other integer formats. + (Contributed by Serhiy Storchaka in :gh:`112068`.) + +* Add support for a new ``p`` format unit in :c:func:`Py_BuildValue` + that produces a Python :class:`bool` object from a C integer. + (Contributed by Pablo Galindo in :issue:`45325`.) + +* Add :c:func:`PyUnstable_IsImmortal` for determining if + an object is :term:`immortal`, for debugging purposes. + (Contributed by Peter Bierma in :gh:`128509`.) + +* Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling + deferred reference counting, as outlined in :pep:`703`. + +* Add :c:func:`PyUnstable_Object_IsUniquelyReferenced` as + a replacement for ``Py_REFCNT(op) == 1`` on :term:`free threaded + ` builds. (Contributed by Peter Bierma in :gh:`133140`.) +* Add :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary` to + determine if an object is a unique temporary object on the + interpreter's operand stack. + This can be used in some cases as a replacement for checking + if :c:func:`Py_REFCNT` is ``1`` for Python objects passed + as arguments to C API functions. + (Contributed by Sam Gross in :gh:`133164`.) + Limited C API changes --------------------- -* In the limited C API 3.14 and newer, :c:func:`Py_TYPE` and - :c:func:`Py_REFCNT` are now implemented as an opaque function call to hide - implementation details. +* In the limited C API version 3.14 and newer, :c:func:`Py_TYPE` and + :c:func:`Py_REFCNT` are now implemented as an opaque function call + to hide implementation details. (Contributed by Victor Stinner in :gh:`120600` and :gh:`124127`.) * Remove the :c:macro:`PySequence_Fast_GET_SIZE`, - :c:macro:`PySequence_Fast_GET_ITEM` and :c:macro:`PySequence_Fast_ITEMS` - macros from the limited C API, since these macros never worked in the limited - C API. Keep :c:func:`PySequence_Fast` in the limited C API. + :c:macro:`PySequence_Fast_GET_ITEM`, + and :c:macro:`PySequence_Fast_ITEMS` + macros from the limited C API, since they have always been broken + in the limited C API. (Contributed by Victor Stinner in :gh:`91417`.) +.. _whatsnew314-c-api-removed: + +Removed C APIs +-------------- + +* Creating :c:data:`immutable types ` with + mutable bases was deprecated in Python 3.12, + and now raises a :exc:`TypeError`. + (Contributed by Nikita Sobolev in :gh:`119775`.) + +* Remove ``PyDictObject.ma_version_tag`` member, which was deprecated + in Python 3.12. + Use the :c:func:`PyDict_AddWatcher` API instead. + (Contributed by Sam Gross in :gh:`124296`.) + +* Remove the private ``_Py_InitializeMain()`` function. + It was a :term:`provisional API` added to Python 3.8 by :pep:`587`. + (Contributed by Victor Stinner in :gh:`129033`.) + +* Remove the undocumented APIs :c:macro:`!Py_C_RECURSION_LIMIT` + and :c:member:`!PyThreadState.c_recursion_remaining`. + These were added in 3.13 and have been removed without deprecation. + Use :c:func:`Py_EnterRecursiveCall` to guard against runaway + recursion in C code. + (Removed by Petr Viktorin in :gh:`133079`, see also :gh:`130396`.) + + +.. _whatsnew314-c-api-deprecated: + +Deprecated C APIs +----------------- + +* The :c:macro:`!Py_HUGE_VAL` macro is now :term:`soft deprecated`. + Use :c:macro:`!Py_INFINITY` instead. + (Contributed by Sergey B Kirpichev in :gh:`120026`.) + +* The :c:macro:`!Py_IS_NAN`, :c:macro:`!Py_IS_INFINITY`, + and :c:macro:`!Py_IS_FINITE` macros are now :term:`soft deprecated`. + Use :c:macro:`!isnan`, :c:macro:`!isinf` and :c:macro:`!isfinite` + instead, available from :file:`math.h` since C99. + (Contributed by Sergey B Kirpichev in :gh:`119613`.) + +* Non-tuple sequences are now deprecated as argument for the ``(items)`` + format unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument + parsing ` functions if *items* contains format units + which store a :ref:`borrowed buffer ` or a + :term:`borrowed reference`. + (Contributed by Serhiy Storchaka in :gh:`50333`.) + +* The ``_PyMonitoring_FireBranchEvent`` function is now deprecated + and should be replaced with calls to + :c:func:`PyMonitoring_FireBranchLeftEvent` and + :c:func:`PyMonitoring_FireBranchRightEvent`. + +* The previously undocumented function :c:func:`PySequence_In` is + now :term:`soft deprecated`. + Use :c:func:`PySequence_Contains` instead. + (Contributed by Yuki Kobayashi in :gh:`127896`.) + +.. Add C API deprecations above alphabetically, not here at the end. + +.. include:: ../deprecations/c-api-pending-removal-in-3.15.rst + +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + +.. include:: ../deprecations/c-api-pending-removal-in-3.18.rst + +.. include:: ../deprecations/c-api-pending-removal-in-future.rst + + +.. _whatsnew314-build-changes: + +Build changes +============= + +* :pep:`776`: Emscripten is now an officially supported platform at + :pep:`tier 3 <11#tier-3>`. As a part of this effort, more than 25 bugs in + `Emscripten libc`__ were fixed. Emscripten now includes support + for :mod:`ctypes`, :mod:`termios`, and :mod:`fcntl`, as well as + experimental support for the new :ref:`default interactive shell + `. + (Contributed by R. Hood Chatham in :gh:`127146`, :gh:`127683`, and :gh:`136931`.) + + __ https://emscripten.org/docs/porting/emscripten-runtime-environment.html + +* Official Android binary releases are now provided on python.org__. + + __ https://www.python.org/downloads/android/ + +* GNU Autoconf 2.72 is now required to generate :file:`configure`. + (Contributed by Erlend Aasland in :gh:`115765`.) + +* ``wasm32-unknown-emscripten`` is now a :pep:`11` tier 3 platform. + (Contributed by R. Hood Chatham in :gh:`127146`, :gh:`127683`, and :gh:`136931`.) + +* ``#pragma``-based linking with ``python3*.lib`` can now be switched off + with :c:expr:`Py_NO_LINK_LIB`. + (Contributed by Jean-Christophe Fillion-Robin in :gh:`82909`.) + +* CPython now enables a set of recommended compiler options by default + for improved security. + Use the :option:`--disable-safety` :file:`configure` option to disable them, + or the :option:`--enable-slower-safety` option for a larger set + of compiler options, albeit with a performance cost. + +* The ``WITH_FREELISTS`` macro and ``--without-freelists`` :file:`configure` + option have been removed. + +* The new :file:`configure` option :option:`--with-tail-call-interp` + may be used to enable the experimental tail call interpreter. + See :ref:`whatsnew314-tail-call-interpreter` for further details. + +* To disable the new remote debugging support, use the + :option:`--without-remote-debug` :file:`configure` option. + This may be useful for security reasons. + +* iOS and macOS apps can now be configured to redirect ``stdout`` and + ``stderr`` content to the system log. + (Contributed by Russell Keith-Magee in :gh:`127592`.) + +* The iOS testbed is now able to stream test output while the test is running. + The testbed can also be used to run the test suite of projects other than + CPython itself. + (Contributed by Russell Keith-Magee in :gh:`127592`.) + + +.. _whatsnew314-build_details: + +:file:`build-details.json` +-------------------------- + +Installations of Python now contain a new file, :file:`build-details.json`. +This is a static JSON document containing build details for CPython, +to allow for introspection without needing to run code. +This is helpful for use-cases such as Python launchers, cross-compilation, +and so on. + +:file:`build-details.json` must be installed in the platform-independent +standard library directory. This corresponds to the :ref:`'stdlib' +` :mod:`sysconfig` installation path, +which can be found by running ``sysconfig.get_path('stdlib')``. + +.. seealso:: + :pep:`739` -- ``build-details.json`` 1.0 -- a static description file + for Python build details + + +.. _whatsnew314-no-more-pgp: + +Discontinuation of PGP signatures +--------------------------------- + +PGP (Pretty Good Privacy) signatures will not be provided +for releases of Python 3.14 or future versions. +To verify CPython artifacts, users must use `Sigstore verification materials +`__. +Releases have been signed using Sigstore_ since Python 3.11. + +This change in release process was specified in :pep:`761`. + +.. _Sigstore: https://www.sigstore.dev/ + + +.. _whatsnew314-free-threaded-now-supported: + +Free-threaded Python is officially supported +-------------------------------------------- + +The free-threaded build of Python is now supported and no longer experimental. +This is the start of `phase II `__ where +free-threaded Python is officially supported but still optional. + +The free-threading team are confident that the project is on the right path, +and appreciate the continued dedication from everyone working to make +free-threading ready for broader adoption across the Python community. + +With these recommendations and the acceptance of this PEP, the Python developer +community should broadly advertise that free-threading is a supported +Python build option now and into the future, and that it will not be removed +without a proper deprecation schedule. + +Any decision to transition to `phase III `__, +with free-threading as the default or sole build of Python is still undecided, +and dependent on many factors both within CPython itself and the community. +This decision is for the future. + +.. seealso:: + + :pep:`779` + + `PEP 779's acceptance `__ + + +.. _whatsnew314-jit-compiler: + +Binary releases for the experimental just-in-time compiler +---------------------------------------------------------- + +The official macOS and Windows release binaries now include an *experimental* +just-in-time (JIT) compiler. Although it is **not** recommended for production +use, it can be tested by setting :envvar:`PYTHON_JIT=1 ` as an +environment variable. Downstream source builds and redistributors can use the +:option:`--enable-experimental-jit=yes-off` configuration option for similar +behavior. + +The JIT is at an early stage and still in active development. As such, the +typical performance impact of enabling it can range from 10% slower to 20% +faster, depending on workload. To aid in testing and evaluation, a set of +introspection functions has been provided in the :data:`sys._jit` namespace. +:func:`sys._jit.is_available` can be used to determine if the current executable +supports JIT compilation, while :func:`sys._jit.is_enabled` can be used to tell +if JIT compilation has been enabled for the current process. + +Currently, the most significant missing functionality is that native debuggers +and profilers like ``gdb`` and ``perf`` are unable to unwind through JIT frames +(Python debuggers and profilers, like :mod:`pdb` or :mod:`profile`, continue to +work without modification). Free-threaded builds do not support JIT compilation. + +Please report any bugs or major performance regressions that you encounter! + +.. seealso:: :pep:`744` + + Porting to Python 3.14 ----------------------- +====================== + +This section lists previously described changes and other bugfixes +that may require changes to your code. + + +Changes in the Python API +------------------------- + +* On Unix platforms other than macOS, *forkserver* is now the default + :ref:`start method ` for :mod:`multiprocessing` + and :class:`~concurrent.futures.ProcessPoolExecutor`, instead of *fork*. + + See :ref:`(1) ` and + :ref:`(2) ` for details. + + If you encounter :exc:`NameError`\s or pickling errors coming out of + :mod:`multiprocessing` or :mod:`concurrent.futures`, see the + :ref:`forkserver restrictions `. + + This change does not affect Windows or macOS, where :ref:`'spawn' + ` remains the default start method. + +* :class:`functools.partial` is now a method descriptor. + Wrap it in :func:`staticmethod` if you want to preserve the old behavior. + (Contributed by Serhiy Storchaka and Dominykas Grigonis in :gh:`121027`.) + +* The :ref:`garbage collector is now incremental `, + which means that the behavior of :func:`gc.collect` changes slightly: + + * ``gc.collect(1)``: Performs an increment of garbage collection, + rather than collecting generation 1. + * Other calls to :func:`!gc.collect` are unchanged. + +* The :func:`locale.nl_langinfo` function now temporarily sets the ``LC_CTYPE`` + locale in some cases. + This temporary change affects other threads. + (Contributed by Serhiy Storchaka in :gh:`69998`.) + +* :class:`types.UnionType` is now an alias for :class:`typing.Union`, + causing changes in some behaviors. + See :ref:`above ` for more details. + (Contributed by Jelle Zijlstra in :gh:`105499`.) + +* The runtime behavior of annotations has changed in various ways; see + :ref:`above ` for details. While most code that interacts + with annotations should continue to work, some undocumented details may behave + differently. + +* As part of making the :mod:`mimetypes` CLI public, + it now exits with ``1`` on failure instead of ``0`` + and ``2`` on incorrect command-line parameters instead of ``1``. + Error messages are now printed to stderr. + +* The ``\B`` pattern in regular expression now matches the empty string + when given as the entire pattern, which may cause behavioural changes. + +* On FreeBSD, :data:`sys.platform` no longer contains the major version number. + + +.. _whatsnew314-porting-annotations: + +Changes in annotations (:pep:`649` and :pep:`749`) +-------------------------------------------------- + +This section contains guidance on changes that may be needed to annotations +or Python code that interacts with or introspects annotations, +due to the changes related to :ref:`deferred evaluation of annotations +`. + +In the majority of cases, working code from older versions of Python +will not require any changes. + + +Implications for annotated code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you define annotations in your code (for example, for use with a static type +checker), then this change probably does not affect you: you can keep +writing annotations the same way you did with previous versions of Python. + +You will likely be able to remove quoted strings in annotations, which are frequently +used for forward references. Similarly, if you use ``from __future__ import annotations`` +to avoid having to write strings in annotations, you may well be able to +remove that import once you support only Python 3.14 and newer. +However, if you rely on third-party libraries that read annotations, +those libraries may need changes to support unquoted annotations before they +work as expected. + + +Implications for readers of ``__annotations__`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your code reads the :attr:`~object.__annotations__` attribute on objects, +you may want to make changes in order to support code that relies on +deferred evaluation of annotations. +For example, you may want to use :func:`annotationlib.get_annotations` with +the :attr:`~annotationlib.Format.FORWARDREF` format, +as the :mod:`dataclasses` module now does. + +The external :pypi:`typing_extensions` package provides partial backports +of some of the functionality of the :mod:`annotationlib` module, +such as the :class:`~annotationlib.Format` enum and +the :func:`~annotationlib.get_annotations` function. +These can be used to write cross-version code that takes advantage of +the new behavior in Python 3.14. + + +Related changes +^^^^^^^^^^^^^^^ + +The changes in Python 3.14 are designed to rework how :attr:`!__annotations__` +works at runtime while minimizing breakage to code that contains +annotations in source code and to code that reads :attr:`!__annotations__`. +However, if you rely on undocumented details of the annotation behavior +or on private functions in the standard library, there are many ways in which +your code may not work in Python 3.14. +To safeguard your code against future changes, only use the documented +functionality of the :mod:`annotationlib` module. + +In particular, do not read annotations directly from the namespace dictionary +attribute of type objects. +Use :func:`annotationlib.get_annotate_from_class_namespace` during class +construction and :func:`annotationlib.get_annotations` afterwards. + +In previous releases, it was sometimes possible to access class annotations +from an instance of an annotated class. This behavior was undocumented +and accidental, and will no longer work in Python 3.14. + + +``from __future__ import annotations`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In Python 3.7, :pep:`563` introduced the ``from __future__ import annotations`` +:ref:`future statement `, which turns all annotations into strings. + +However, this statement is now deprecated and it is expected to be removed +in a future version of Python. +This removal will not happen until after Python 3.13 reaches its end of life +in 2029, being the last version of Python without support for deferred +evaluation of annotations. + +In Python 3.14, the behavior of code using ``from __future__ import annotations`` +is unchanged. + + +Changes in the C API +-------------------- * :c:func:`Py_Finalize` now deletes all interned strings. This - is backwards incompatible to any C-Extension that holds onto an interned + is backwards incompatible to any C extension that holds onto an interned string after a call to :c:func:`Py_Finalize` and is then reused after a call to :c:func:`Py_Initialize`. Any issues arising from this behavior will normally result in crashes during the execution of the subsequent call to @@ -2657,6 +3429,7 @@ Porting to Python 3.14 * ``_Py_GetConfig()``: :c:func:`PyConfig_Get` and :c:func:`PyConfig_GetInt` * ``_Py_HashBytes()``: :c:func:`Py_HashBuffer` * ``_Py_fopen_obj()``: :c:func:`Py_fopen` + * ``PyMutex_IsLocked()`` : :c:func:`PyMutex_IsLocked` The `pythoncapi-compat project`_ can be used to get most of these new functions on Python 3.13 and older. @@ -2664,106 +3437,11 @@ Porting to Python 3.14 .. _pythoncapi-compat project: https://github.com/python/pythoncapi-compat/ -.. _whatsnew314-c-api-deprecated: - -Deprecated ----------- - -* The :c:macro:`!Py_HUGE_VAL` macro is :term:`soft deprecated`, - use :c:macro:`!Py_INFINITY` instead. - (Contributed by Sergey B Kirpichev in :gh:`120026`.) - -* Macros :c:macro:`!Py_IS_NAN`, :c:macro:`!Py_IS_INFINITY` - and :c:macro:`!Py_IS_FINITE` are :term:`soft deprecated`, - use instead :c:macro:`!isnan`, :c:macro:`!isinf` and - :c:macro:`!isfinite` available from :file:`math.h` - since C99. (Contributed by Sergey B Kirpichev in :gh:`119613`.) - -* Non-tuple sequences are deprecated as argument for the ``(items)`` - format unit in :c:func:`PyArg_ParseTuple` and other - :ref:`argument parsing ` functions if *items* contains - format units which store a :ref:`borrowed buffer ` - or a :term:`borrowed reference`. - (Contributed by Serhiy Storchaka in :gh:`50333`.) - -* The previously undocumented function :c:func:`PySequence_In` is :term:`soft deprecated`. - Use :c:func:`PySequence_Contains` instead. - (Contributed by Yuki Kobayashi in :gh:`127896`.) - -.. Add C API deprecations above alphabetically, not here at the end. - -* The ``PyMonitoring_FireBranchEvent`` function is deprecated and should - be replaced with calls to :c:func:`PyMonitoring_FireBranchLeftEvent` - and :c:func:`PyMonitoring_FireBranchRightEvent`. - -* The following private functions are deprecated and planned for removal in - Python 3.18: - - * :c:func:`!_PyBytes_Join`: use :c:func:`PyBytes_Join`. - * :c:func:`!_PyDict_GetItemStringWithError`: use :c:func:`PyDict_GetItemStringRef`. - * :c:func:`!_PyDict_Pop()`: use :c:func:`PyDict_Pop`. - * :c:func:`!_PyLong_Sign()`: use :c:func:`PyLong_GetSign`. - * :c:func:`!_PyLong_FromDigits` and :c:func:`!_PyLong_New`: - use :c:func:`PyLongWriter_Create`. - * :c:func:`!_PyThreadState_UncheckedGet`: use :c:func:`PyThreadState_GetUnchecked`. - * :c:func:`!_PyUnicode_AsString`: use :c:func:`PyUnicode_AsUTF8`. - * :c:func:`!_PyUnicodeWriter_Init`: - replace ``_PyUnicodeWriter_Init(&writer)`` with - :c:func:`writer = PyUnicodeWriter_Create(0) `. - * :c:func:`!_PyUnicodeWriter_Finish`: - replace ``_PyUnicodeWriter_Finish(&writer)`` with - :c:func:`PyUnicodeWriter_Finish(writer) `. - * :c:func:`!_PyUnicodeWriter_Dealloc`: - replace ``_PyUnicodeWriter_Dealloc(&writer)`` with - :c:func:`PyUnicodeWriter_Discard(writer) `. - * :c:func:`!_PyUnicodeWriter_WriteChar`: - replace ``_PyUnicodeWriter_WriteChar(&writer, ch)`` with - :c:func:`PyUnicodeWriter_WriteChar(writer, ch) `. - * :c:func:`!_PyUnicodeWriter_WriteStr`: - replace ``_PyUnicodeWriter_WriteStr(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteStr(writer, str) `. - * :c:func:`!_PyUnicodeWriter_WriteSubstring`: - replace ``_PyUnicodeWriter_WriteSubstring(&writer, str, start, end)`` with - :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) `. - * :c:func:`!_PyUnicodeWriter_WriteASCIIString`: - replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. - * :c:func:`!_PyUnicodeWriter_WriteLatin1String`: - replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. - * :c:func:`!_Py_HashPointer`: use :c:func:`Py_HashPointer`. - * :c:func:`!_Py_fopen_obj`: use :c:func:`Py_fopen`. - - The `pythoncapi-compat project`_ can be used to get these new public - functions on Python 3.13 and older. - (Contributed by Victor Stinner in :gh:`128863`.) - -.. include:: ../deprecations/c-api-pending-removal-in-3.15.rst - -.. include:: ../deprecations/c-api-pending-removal-in-3.18.rst - -.. include:: ../deprecations/c-api-pending-removal-in-future.rst - - -.. _whatsnew314-c-api-removed: - -Removed -------- - -* Creating :c:data:`immutable types ` with mutable - bases was deprecated since 3.12 and now raises a :exc:`TypeError`. - -* Remove ``PyDictObject.ma_version_tag`` member which was deprecated since - Python 3.12. Use the :c:func:`PyDict_AddWatcher` API instead. - (Contributed by Sam Gross in :gh:`124296`.) - -* Remove the private ``_Py_InitializeMain()`` function. It was a - :term:`provisional API` added to Python 3.8 by :pep:`587`. - (Contributed by Victor Stinner in :gh:`129033`.) +Notable changes in 3.14.1 +========================= -* The undocumented APIs :c:macro:`!Py_C_RECURSION_LIMIT` and - :c:member:`!PyThreadState.c_recursion_remaining`, added in 3.13, are removed - without a deprecation period. - Please use :c:func:`Py_EnterRecursiveCall` to guard against runaway recursion - in C code. - (Removed in :gh:`133079`, see also :gh:`130396`.) +* Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and + :c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set + the stack protection base address and stack protection size of a Python + thread state. + (Contributed by Victor Stinner in :gh:`139653`.) diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst index 7104904c956a7a..47c4d9acbc870e 100644 --- a/Doc/whatsnew/3.2.rst +++ b/Doc/whatsnew/3.2.rst @@ -458,7 +458,7 @@ Some smaller changes made to the core Python language are: :class:`~collections.defaultdict`, :class:`~shelve.Shelf`, :class:`~configparser.ConfigParser`, or :mod:`dbm`. It is also useful with custom :class:`dict` subclasses that normalize keys before look-up or that - supply a :meth:`__missing__` method for unknown keys:: + supply a :meth:`~object.__missing__` method for unknown keys:: >>> import shelve >>> d = shelve.open('tmp.shl') diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 7a8eb47cbdb354..87fb3f54e687b5 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -331,7 +331,7 @@ simplified and finer-grained. You don't have to worry anymore about choosing the appropriate exception type between :exc:`OSError`, :exc:`IOError`, :exc:`EnvironmentError`, -:exc:`WindowsError`, :exc:`mmap.error`, :exc:`socket.error` or +:exc:`WindowsError`, :exc:`!mmap.error`, :exc:`socket.error` or :exc:`select.error`. All these exception types are now only one: :exc:`OSError`. The other names are kept as aliases for compatibility reasons. @@ -805,7 +805,7 @@ Some smaller changes made to the core Python language are: * New methods have been added to :class:`list` and :class:`bytearray`: ``copy()`` and ``clear()`` (:issue:`10516`). Consequently, :class:`~collections.abc.MutableSequence` now also defines a - :meth:`~collections.abc.MutableSequence.clear` method (:issue:`11388`). + :meth:`!clear` method (:issue:`11388`). * Raw bytes literals can now be written ``rb"..."`` as well as ``br"..."``. @@ -869,10 +869,10 @@ faulthandler This new debug module :mod:`faulthandler` contains functions to dump Python tracebacks explicitly, on a fault (a crash like a segmentation fault), after a timeout, or on a user signal. Call :func:`faulthandler.enable` to install fault handlers for the -:const:`SIGSEGV`, :const:`SIGFPE`, :const:`SIGABRT`, :const:`SIGBUS`, and -:const:`SIGILL` signals. You can also enable them at startup by setting the -:envvar:`PYTHONFAULTHANDLER` environment variable or by using :option:`-X` -``faulthandler`` command line option. +:const:`~signal.SIGSEGV`, :const:`~signal.SIGFPE`, :const:`~signal.SIGABRT`, +:const:`~signal.SIGBUS`, and :const:`~signal.SIGILL` signals. +You can also enable them at startup by setting the :envvar:`PYTHONFAULTHANDLER` +environment variable or by using :option:`-X` ``faulthandler`` command line option. Example of a segmentation fault on Linux: @@ -916,7 +916,7 @@ abc Improved support for abstract base classes containing descriptors composed with abstract methods. The recommended approach to declaring abstract descriptors is -now to provide :attr:`__isabstractmethod__` as a dynamically updated +now to provide :attr:`!__isabstractmethod__` as a dynamically updated property. The built-in descriptors have been updated accordingly. * :class:`abc.abstractproperty` has been deprecated, use :class:`property` @@ -979,7 +979,7 @@ new features have been added: (Contributed by Nir Aides in :issue:`1625`.) * :class:`bz2.BZ2File` now implements all of the :class:`io.BufferedIOBase` API, - except for the :meth:`detach` and :meth:`truncate` methods. + except for the :meth:`!detach` and :meth:`!truncate` methods. codecs @@ -1064,7 +1064,7 @@ curses * If the :mod:`curses` module is linked to the ncursesw library, use Unicode functions when Unicode strings or characters are passed (e.g. - :c:func:`waddwstr`), and bytes functions otherwise (e.g. :c:func:`waddstr`). + :c:func:`!waddwstr`), and bytes functions otherwise (e.g. :c:func:`!waddstr`). * Use the locale encoding instead of ``utf-8`` to encode Unicode strings. * :class:`curses.window` has a new :attr:`curses.window.encoding` attribute. * The :class:`curses.window` class has a new :meth:`~curses.window.get_wch` @@ -1137,15 +1137,15 @@ API changes * The C module has the following context limits, depending on the machine architecture: - +-------------------+----------------+-------------------------+ - | | 32-bit | 64-bit | - +===================+================+=========================+ - | :const:`MAX_PREC` | ``425000000`` | ``999999999999999999`` | - +-------------------+----------------+-------------------------+ - | :const:`MAX_EMAX` | ``425000000`` | ``999999999999999999`` | - +-------------------+----------------+-------------------------+ - | :const:`MIN_EMIN` | ``-425000000`` | ``-999999999999999999`` | - +-------------------+----------------+-------------------------+ + +----------------------------+----------------+-------------------------+ + | | 32-bit | 64-bit | + +============================+================+=========================+ + | :const:`~decimal.MAX_PREC` | ``425000000`` | ``999999999999999999`` | + +----------------------------+----------------+-------------------------+ + | :const:`~decimal.MAX_EMAX` | ``425000000`` | ``999999999999999999`` | + +----------------------------+----------------+-------------------------+ + | :const:`~decimal.MIN_EMIN` | ``-425000000`` | ``-999999999999999999`` | + +----------------------------+----------------+-------------------------+ * In the context templates (:const:`~decimal.DefaultContext`, :const:`~decimal.BasicContext` and :const:`~decimal.ExtendedContext`) @@ -1434,7 +1434,7 @@ html :class:`html.parser.HTMLParser` is now able to parse broken markup without raising errors, therefore the *strict* argument of the constructor and the -:exc:`~html.parser.HTMLParseError` exception are now deprecated. +:exc:`!HTMLParseError` exception are now deprecated. The ability to parse broken markup is the result of a number of bug fixes that are also available on the latest bug fix releases of Python 2.7/3.2. (Contributed by Ezio Melotti in :issue:`15114`, and :issue:`14538`, @@ -1486,7 +1486,7 @@ already exists. It is based on the C11 'x' mode to fopen(). The constructor of the :class:`~io.TextIOWrapper` class has a new *write_through* optional argument. If *write_through* is ``True``, calls to -:meth:`~io.TextIOWrapper.write` are guaranteed not to be buffered: any data +:meth:`!write` are guaranteed not to be buffered: any data written on the :class:`~io.TextIOWrapper` object is immediately handled to its underlying binary buffer. @@ -1504,7 +1504,7 @@ logging The :func:`~logging.basicConfig` function now supports an optional ``handlers`` argument taking an iterable of handlers to be added to the root logger. -A class level attribute :attr:`~logging.handlers.SysLogHandler.append_nul` has +A class level attribute :attr:`!append_nul` has been added to :class:`~logging.handlers.SysLogHandler` to allow control of the appending of the ``NUL`` (``\000``) byte to syslog records, since for some daemons it is required while for others it is passed through to the log. @@ -1536,8 +1536,8 @@ The new :func:`multiprocessing.connection.wait` function allows polling multiple objects (such as connections, sockets and pipes) with a timeout. (Contributed by Richard Oudkerk in :issue:`12328`.) -:class:`multiprocessing.Connection` objects can now be transferred over -multiprocessing connections. +:class:`multiprocessing.connection.Connection` objects can now be transferred +over multiprocessing connections. (Contributed by Richard Oudkerk in :issue:`4892`.) :class:`multiprocessing.Process` now accepts a ``daemon`` keyword argument @@ -1611,7 +1611,7 @@ os :func:`~os.rename`, :func:`~os.replace`, :func:`~os.rmdir`, :func:`~os.stat`, :func:`~os.symlink`, :func:`~os.unlink`, :func:`~os.utime`. Platform support for using these parameters can be checked via the sets - :data:`os.supports_dir_fd` and :data:`os.supports_follows_symlinks`. + :data:`os.supports_dir_fd` and :data:`os.supports_follow_symlinks`. - The following functions now support a file descriptor for their path argument: :func:`~os.chdir`, :func:`~os.chmod`, :func:`~os.chown`, @@ -1698,7 +1698,7 @@ os :const:`~os.RTLD_NOLOAD`, and :const:`~os.RTLD_DEEPBIND` are available on platforms that support them. These are for use with the :func:`sys.setdlopenflags` function, and supersede the similar constants - defined in :mod:`ctypes` and :mod:`DLFCN`. (Contributed by Victor Stinner + defined in :mod:`ctypes` and :mod:`!DLFCN`. (Contributed by Victor Stinner in :issue:`13226`.) * :func:`os.symlink` now accepts (and ignores) the ``target_is_directory`` @@ -1728,8 +1728,8 @@ reduction functions to be set. pydoc ----- -The Tk GUI and the :func:`~pydoc.serve` function have been removed from the -:mod:`pydoc` module: ``pydoc -g`` and :func:`~pydoc.serve` have been deprecated +The Tk GUI and the :func:`!serve` function have been removed from the +:mod:`pydoc` module: ``pydoc -g`` and :func:`!serve` have been deprecated in Python 3.2. @@ -1931,7 +1931,7 @@ ssl * :func:`~ssl.RAND_bytes`: generate cryptographically strong pseudo-random bytes. - * :func:`~ssl.RAND_pseudo_bytes`: generate pseudo-random bytes. + * :func:`!RAND_pseudo_bytes`: generate pseudo-random bytes. (Contributed by Victor Stinner in :issue:`12049`.) @@ -2020,8 +2020,7 @@ tarfile tempfile -------- -:class:`tempfile.SpooledTemporaryFile`\'s -:meth:`~tempfile.SpooledTemporaryFile.truncate` method now accepts +:class:`tempfile.SpooledTemporaryFile`\'s :meth:`!truncate` method now accepts a ``size`` parameter. (Contributed by Ryan Kelly in :issue:`9957`.) @@ -2129,7 +2128,7 @@ xml.etree.ElementTree The :mod:`xml.etree.ElementTree` module now imports its C accelerator by default; there is no longer a need to explicitly import -:mod:`xml.etree.cElementTree` (this module stays for backwards compatibility, +:mod:`!xml.etree.cElementTree` (this module stays for backwards compatibility, but is now deprecated). In addition, the ``iter`` family of methods of :class:`~xml.etree.ElementTree.Element` has been optimized (rewritten in C). The module's documentation has also been greatly improved with added examples @@ -2197,7 +2196,7 @@ Changes to Python's build process and to the C API include: * :c:func:`PyUnicode_AsUCS4`, :c:func:`PyUnicode_AsUCS4Copy` * :c:macro:`PyUnicode_DATA`, :c:macro:`PyUnicode_1BYTE_DATA`, :c:macro:`PyUnicode_2BYTE_DATA`, :c:macro:`PyUnicode_4BYTE_DATA` - * :c:macro:`PyUnicode_KIND` with :c:enum:`PyUnicode_Kind` enum: + * :c:macro:`PyUnicode_KIND` with :c:enum:`!PyUnicode_Kind` enum: :c:data:`!PyUnicode_WCHAR_KIND`, :c:data:`PyUnicode_1BYTE_KIND`, :c:data:`PyUnicode_2BYTE_KIND`, :c:data:`PyUnicode_4BYTE_KIND` * :c:macro:`PyUnicode_READ`, :c:macro:`PyUnicode_READ_CHAR`, :c:macro:`PyUnicode_WRITE` @@ -2232,17 +2231,17 @@ Deprecated Python modules, functions and methods (``utf-32-le`` or ``utf-32-be``) * :meth:`ftplib.FTP.nlst` and :meth:`ftplib.FTP.dir`: use :meth:`ftplib.FTP.mlsd` -* :func:`platform.popen`: use the :mod:`subprocess` module. Check especially +* :func:`!platform.popen`: use the :mod:`subprocess` module. Check especially the :ref:`subprocess-replacements` section (:issue:`11377`). * :issue:`13374`: The Windows bytes API has been deprecated in the :mod:`os` module. Use Unicode filenames, instead of bytes filenames, to not depend on the ANSI code page anymore and to support any filename. -* :issue:`13988`: The :mod:`xml.etree.cElementTree` module is deprecated. The +* :issue:`13988`: The :mod:`!xml.etree.cElementTree` module is deprecated. The accelerator is used automatically whenever available. -* The behaviour of :func:`time.clock` depends on the platform: use the new +* The behaviour of :func:`!time.clock` depends on the platform: use the new :func:`time.perf_counter` or :func:`time.process_time` function instead, depending on your requirements, to have a well defined behaviour. -* The :func:`os.stat_float_times` function is deprecated. +* The :func:`!os.stat_float_times` function is deprecated. * :mod:`abc` module: * :class:`abc.abstractproperty` has been deprecated, use :class:`property` @@ -2466,7 +2465,7 @@ Porting C code * In the course of changes to the buffer API the undocumented :c:member:`!smalltable` member of the :c:type:`Py_buffer` structure has been removed and the - layout of the :c:type:`PyMemoryViewObject` has changed. + layout of the :c:type:`!PyMemoryViewObject` has changed. All extensions relying on the relevant parts in ``memoryobject.h`` or ``object.h`` must be rebuilt. diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index e4f602a17ee968..9f4068116e3726 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -2,7 +2,7 @@ What's New In Python 3.4 **************************** -:Author: R. David Murray (Editor) +:Author: \R. David Murray (Editor) .. Rules for maintenance: @@ -122,7 +122,7 @@ Significantly improved library modules: on Unix ` (:issue:`8713`). * :mod:`email` has a new submodule, :mod:`~email.contentmanager`, and a new :mod:`~email.message.Message` subclass - (:class:`~email.contentmanager.EmailMessage`) that :ref:`simplify MIME + (:class:`~email.message.EmailMessage`) that :ref:`simplify MIME handling ` (:issue:`18891`). * The :mod:`inspect` and :mod:`pydoc` modules are now capable of correct introspection of a much wider variety of callable objects, @@ -154,7 +154,7 @@ Security improvements: `. * All modules in the standard library that support SSL now support server certificate verification, including hostname matching - (:func:`ssl.match_hostname`) and CRLs (Certificate Revocation lists, see + (:func:`!ssl.match_hostname`) and CRLs (Certificate Revocation lists, see :func:`ssl.SSLContext.load_verify_locations`). CPython implementation improvements: @@ -739,7 +739,7 @@ these new components. In addition, a new application-friendly class :class:`~dis.Bytecode` provides an object-oriented API for inspecting bytecode in both in human-readable form and for iterating over instructions. The :class:`~dis.Bytecode` constructor -takes the same arguments that :func:`~dis.get_instruction` does (plus an +takes the same arguments that :func:`~dis.get_instructions` does (plus an optional *current_offset*), and the resulting object can be iterated to produce :class:`~dis.Instruction` objects. But it also has a :mod:`~dis.Bytecode.dis` method, equivalent to calling :mod:`~dis.dis` on the constructor argument, but @@ -958,8 +958,9 @@ http optional additional *explain* parameter which can be used to provide an extended error description, overriding the hardcoded default if there is one. This extended error description will be formatted using the -:attr:`~http.server.HTTP.error_message_format` attribute and sent as the body -of the error response. (Contributed by Karl Cow in :issue:`12921`.) +:attr:`~http.server.BaseHTTPRequestHandler.error_message_format` attribute +and sent as the body of the error response. +(Contributed by Karl Cow in :issue:`12921`.) The :mod:`http.server` :ref:`command line interface ` now has a ``-b/--bind`` option that causes the server to listen on a specific address. @@ -1038,7 +1039,7 @@ As part of the implementation of the new :mod:`enum` module, the metaclasses. (Contributed by Ethan Furman in :issue:`18929` and :issue:`19030`.) -:func:`~inspect.getfullargspec` and :func:`~inspect.getargspec` +:func:`~inspect.getfullargspec` and :func:`!getargspec` now use the :func:`~inspect.signature` API. This allows them to support a much broader range of callables, including those with ``__signature__`` attributes, those with metadata provided by argument @@ -1221,7 +1222,7 @@ pickle :mod:`pickle` now supports (but does not use by default) a new pickle protocol, protocol 4. This new protocol addresses a number of issues that were present in previous protocols, such as the serialization of nested classes, very large -strings and containers, and classes whose :meth:`__new__` method takes +strings and containers, and classes whose :meth:`~object.__new__` method takes keyword-only arguments. It also provides some efficiency improvements. .. seealso:: @@ -1299,7 +1300,7 @@ affect the behaviour of :func:`help`. re -- -New :func:`~re.fullmatch` function and :meth:`.regex.fullmatch` method anchor +New :func:`~re.fullmatch` function and :meth:`.Pattern.fullmatch` method anchor the pattern at both ends of the string to match. This provides a way to be explicit about the goal of the match, which avoids a class of subtle bugs where ``$`` characters get lost during code changes or the addition of alternatives @@ -1519,7 +1520,7 @@ subprocess be used to provide the contents of ``stdin`` for the command that is run. (Contributed by Zack Weinberg in :issue:`16624`.) -:func:`~subprocess.getstatus` and :func:`~subprocess.getstatusoutput` now +:func:`~subprocess.getoutput` and :func:`~subprocess.getstatusoutput` now work on Windows. This change was actually inadvertently made in 3.3.4. (Contributed by Tim Golden in :issue:`10197`.) @@ -1535,7 +1536,7 @@ plain tuple. (Contributed by Claudiu Popa in :issue:`18901`.) called automatically at the end of the block. (Contributed by Serhiy Storchaka in :issue:`18878`.) -:meth:`.AU_write.setsampwidth` now supports 24 bit samples, thus adding +:meth:`!AU_write.setsampwidth` now supports 24 bit samples, thus adding support for writing 24 sample using the module. (Contributed by Serhiy Storchaka in :issue:`19261`.) @@ -1615,7 +1616,7 @@ A new :func:`~types.DynamicClassAttribute` descriptor provides a way to define an attribute that acts normally when looked up through an instance object, but which is routed to the *class* ``__getattr__`` when looked up through the class. This allows one to have properties active on a class, and have virtual -attributes on the class with the same name (see :mod:`Enum` for an example). +attributes on the class with the same name (see :mod:`enum` for an example). (Contributed by Ethan Furman in :issue:`19030`.) @@ -1709,7 +1710,7 @@ matching calls, which means an argument can now be matched by either position or name, instead of only by position. (Contributed by Antoine Pitrou in :issue:`17015`.) -:func:`~mock.mock_open` objects now have ``readline`` and ``readlines`` +:func:`~unittest.mock.mock_open` objects now have ``readline`` and ``readlines`` methods. (Contributed by Toshio Kuratomi in :issue:`17467`.) @@ -1729,8 +1730,8 @@ installed in the virtual environment. (Contributed by Nick Coghlan in wave ---- -The :meth:`~wave.getparams` method now returns a namedtuple rather than a -plain tuple. (Contributed by Claudiu Popa in :issue:`17487`.) +The :meth:`~wave.Wave_read.getparams` method now returns a namedtuple rather +than a plain tuple. (Contributed by Claudiu Popa in :issue:`17487`.) :meth:`wave.open` now supports the context management protocol. (Contributed by Claudiu Popa in :issue:`17616`.) @@ -1788,7 +1789,7 @@ example, this could be used to exclude test files from the archive. (Contributed by Christian Tismer in :issue:`19274`.) The *allowZip64* parameter to :class:`~zipfile.ZipFile` and -:class:`~zipfile.PyZipfile` is now ``True`` by default. (Contributed by +:class:`~zipfile.PyZipFile` is now ``True`` by default. (Contributed by William Mallard in :issue:`17201`.) @@ -1817,7 +1818,7 @@ PEP 442: Safe Object Finalization --------------------------------- :pep:`442` removes the current limitations and quirks of object finalization -in CPython. With it, objects with :meth:`__del__` methods, as well as +in CPython. With it, objects with :meth:`~object.__del__` methods, as well as generators with :keyword:`finally` clauses, can be finalized when they are part of a reference cycle. @@ -2043,7 +2044,7 @@ Significant Optimizations strings is now significantly faster. (Contributed by Victor Stinner and Antoine Pitrou in :issue:`15596`.) -* A performance issue in :meth:`io.FileIO.readall` has been solved. This +* A performance issue in :meth:`!io.FileIO.readall` has been solved. This particularly affects Windows, and significantly speeds up the case of piping significant amounts of data through :mod:`subprocess`. (Contributed by Richard Oudkerk in :issue:`15758`.) @@ -2103,7 +2104,7 @@ Deprecations in the Python API * The :mod:`!imp` module is pending deprecation. To keep compatibility with Python 2/3 code bases, the module's removal is currently not scheduled. -* The :mod:`formatter` module is pending deprecation and is slated for removal +* The :mod:`!formatter` module is pending deprecation and is slated for removal in Python 3.6. * ``MD5`` as the default *digestmod* for the :func:`hmac.new` function is @@ -2120,11 +2121,11 @@ Deprecations in the Python API * The *strict* argument of :class:`~html.parser.HTMLParser` is deprecated. -* The :mod:`plistlib` :func:`~plistlib.readPlist`, - :func:`~plistlib.writePlist`, :func:`~plistlib.readPlistFromBytes`, and - :func:`~plistlib.writePlistToBytes` functions are deprecated in favor of the +* The :mod:`plistlib` :func:`!readPlist`, + :func:`!writePlist`, :func:`!readPlistFromBytes`, and + :func:`!writePlistToBytes` functions are deprecated in favor of the corresponding new functions :func:`~plistlib.load`, :func:`~plistlib.dump`, - :func:`~plistlib.loads`, and :func:`~plistlib.dumps`. :func:`~plistlib.Data` + :func:`~plistlib.loads`, and :func:`~plistlib.dumps`. :func:`!Data` is deprecated in favor of just using the :class:`bytes` constructor. * The :mod:`sysconfig` key ``SO`` is deprecated, it has been replaced by @@ -2212,8 +2213,8 @@ removed: that do not have a __format__ method that handles it. See :issue:`7994` for background. -* :meth:`difflib.SequenceMatcher.isbjunk` and - :meth:`difflib.SequenceMatcher.isbpopular` were deprecated in 3.2, and have +* :meth:`!difflib.SequenceMatcher.isbjunk` and + :meth:`!difflib.SequenceMatcher.isbpopular` were deprecated in 3.2, and have now been removed: use ``x in sm.bjunk`` and ``x in sm.bpopular``, where *sm* is a :class:`~difflib.SequenceMatcher` object (:issue:`13248`). @@ -2280,7 +2281,7 @@ Changes in the Python API * :meth:`!importlib.util.module_for_loader` now sets ``__loader__`` and ``__package__`` unconditionally to properly support reloading. If this is not desired then you will need to set these attributes manually. You can use - :func:`importlib.util.module_to_load` for module management. + :func:`!importlib.util.module_to_load` for module management. * Import now resets relevant attributes (e.g. ``__name__``, ``__loader__``, ``__package__``, ``__file__``, ``__cached__``) unconditionally when reloading. @@ -2428,7 +2429,7 @@ Changes in the Python API disallowed command forms didn't make any sense and are unlikely to be in use. * The :func:`re.split`, :func:`re.findall`, and :func:`re.sub` functions, and - the :meth:`~re.match.group` and :meth:`~re.match.groups` methods of + the :meth:`~re.Match.group` and :meth:`~re.Match.groups` methods of ``match`` objects now always return a *bytes* object when the string to be matched is a :term:`bytes-like object`. Previously the return type matched the input type, so if your code was depending on the return value diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index db3f1db3bd74ad..6009dd8a71eea5 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -181,7 +181,7 @@ Coroutine functions are declared using the new :keyword:`async def` syntax:: Inside a coroutine function, the new :keyword:`await` expression can be used to suspend coroutine execution until the result is available. Any object can be *awaited*, as long as it implements the :term:`awaitable` protocol by -defining the :meth:`__await__` method. +defining the :meth:`~object.__await__` method. PEP 492 also adds :keyword:`async for` statement for convenient iteration over asynchronous iterables. @@ -273,9 +273,10 @@ PEP 465 - A dedicated infix operator for matrix multiplication :pep:`465` adds the ``@`` infix operator for matrix multiplication. Currently, no builtin Python types implement the new operator, however, it -can be implemented by defining :meth:`__matmul__`, :meth:`__rmatmul__`, -and :meth:`__imatmul__` for regular, reflected, and in-place matrix -multiplication. The semantics of these methods is similar to that of +can be implemented by defining :meth:`~object.__matmul__`, +:meth:`~object.__rmatmul__`, and :meth:`~object.__imatmul__` for regular, +reflected, and in-place matrix multiplication. +The semantics of these methods is similar to that of methods defining other infix arithmetic operators. Matrix multiplication is a notably common operation in many fields of @@ -800,7 +801,7 @@ Notable changes in the :mod:`asyncio` module since Python 3.4.0: control. (Contributed by Victor Stinner.) -* The :func:`~asyncio.async` function is deprecated in favor of +* The :func:`!async` function is deprecated in favor of :func:`~asyncio.ensure_future`. (Contributed by Yury Selivanov.) @@ -905,10 +906,8 @@ collections The :class:`~collections.OrderedDict` class is now implemented in C, which makes it 4 to 100 times faster. (Contributed by Eric Snow in :issue:`16991`.) -:meth:`OrderedDict.items() `, -:meth:`OrderedDict.keys() `, -:meth:`OrderedDict.values() ` views now support -:func:`reversed` iteration. +:meth:`!OrderedDict.items`, :meth:`!OrderedDict.keys`, +and :meth:`!OrderedDict.values` views now support :func:`reversed` iteration. (Contributed by Serhiy Storchaka in :issue:`19505`.) The :class:`~collections.deque` class now defines @@ -928,7 +927,7 @@ Docstrings produced by :func:`~collections.namedtuple` can now be updated:: (Contributed by Berker Peksag in :issue:`24064`.) The :class:`~collections.UserString` class now implements the -:meth:`__getnewargs__`, :meth:`__rmod__`, :meth:`~str.casefold`, +:meth:`~object.__getnewargs__`, :meth:`~object.__rmod__`, :meth:`~str.casefold`, :meth:`~str.format_map`, :meth:`~str.isprintable`, and :meth:`~str.maketrans` methods to match the corresponding methods of :class:`str`. (Contributed by Joe Jevnik in :issue:`22189`.) @@ -937,7 +936,7 @@ methods to match the corresponding methods of :class:`str`. collections.abc --------------- -The :meth:`Sequence.index() ` method now +The :meth:`!Sequence.index` method now accepts *start* and *stop* arguments to match the corresponding methods of :class:`tuple`, :class:`list`, etc. (Contributed by Devin Jeanpierre in :issue:`23086`.) @@ -1045,8 +1044,8 @@ not just sequences. (Contributed by Serhiy Storchaka in :issue:`23171`.) curses ------ -The new :func:`~curses.update_lines_cols` function updates the :data:`LINES` -and :data:`COLS` module variables. This is useful for detecting +The new :func:`~curses.update_lines_cols` function updates the :data:`~curses.LINES` +and :data:`~curses.COLS` module variables. This is useful for detecting manual screen resizing. (Contributed by Arnon Yaari in :issue:`4254`.) @@ -1347,8 +1346,8 @@ network objects from existing addresses:: (Contributed by Peter Moody and Antoine Pitrou in :issue:`16531`.) -A new :attr:`~ipaddress.IPv4Network.reverse_pointer` attribute for the -:class:`~ipaddress.IPv4Network` and :class:`~ipaddress.IPv6Network` classes +A new :attr:`~ipaddress.IPv4Address.reverse_pointer` attribute for the +:class:`~ipaddress.IPv4Address` and :class:`~ipaddress.IPv6Address` classes returns the name of the reverse DNS PTR record:: >>> import ipaddress @@ -1451,7 +1450,7 @@ and :data:`~math.nan`. (Contributed by Mark Dickinson in :issue:`23185`.) A new function :func:`~math.isclose` provides a way to test for approximate equality. (Contributed by Chris Barker and Tal Einat in :issue:`24270`.) -A new :func:`~math.gcd` function has been added. The :func:`fractions.gcd` +A new :func:`~math.gcd` function has been added. The :func:`!fractions.gcd` function is now deprecated. (Contributed by Mark Dickinson and Serhiy Storchaka in :issue:`22486`.) @@ -1602,10 +1601,10 @@ The :func:`~re.sub` and :func:`~re.subn` functions now replace unmatched groups with empty strings instead of raising an exception. (Contributed by Serhiy Storchaka in :issue:`1519638`.) -The :class:`re.error` exceptions have new attributes, -:attr:`~re.error.msg`, :attr:`~re.error.pattern`, -:attr:`~re.error.pos`, :attr:`~re.error.lineno`, -and :attr:`~re.error.colno`, that provide better context +The :class:`re.error ` exceptions have new attributes, +:attr:`~re.PatternError.msg`, :attr:`~re.PatternError.pattern`, +:attr:`~re.PatternError.pos`, :attr:`~re.PatternError.lineno`, +and :attr:`~re.PatternError.colno`, that provide better context information about the error:: >>> re.compile(""" @@ -1794,10 +1793,10 @@ query the actual protocol version in use. (Contributed by Antoine Pitrou in :issue:`20421`.) The :class:`~ssl.SSLSocket` class now implements -a :meth:`SSLSocket.sendfile() ` method. +a :meth:`!SSLSocket.sendfile` method. (Contributed by Giampaolo Rodola' in :issue:`17552`.) -The :meth:`SSLSocket.send() ` method now raises either +The :meth:`!SSLSocket.send` method now raises either the :exc:`ssl.SSLWantReadError` or :exc:`ssl.SSLWantWriteError` exception on a non-blocking socket if the operation would block. Previously, it would return ``0``. (Contributed by Nikolaus Rath in :issue:`20951`.) @@ -1806,20 +1805,20 @@ The :func:`~ssl.cert_time_to_seconds` function now interprets the input time as UTC and not as local time, per :rfc:`5280`. Additionally, the return value is always an :class:`int`. (Contributed by Akira Li in :issue:`19940`.) -New :meth:`SSLObject.shared_ciphers() ` and +New :meth:`!SSLObject.shared_ciphers` and :meth:`SSLSocket.shared_ciphers() ` methods return the list of ciphers sent by the client during the handshake. (Contributed by Benjamin Peterson in :issue:`23186`.) The :meth:`SSLSocket.do_handshake() `, :meth:`SSLSocket.read() `, -:meth:`SSLSocket.shutdown() `, and +:meth:`!SSLSocket.shutdown`, and :meth:`SSLSocket.write() ` methods of the :class:`~ssl.SSLSocket` class no longer reset the socket timeout every time bytes are received or sent. The socket timeout is now the maximum total duration of the method. (Contributed by Victor Stinner in :issue:`23853`.) -The :func:`~ssl.match_hostname` function now supports matching of IP addresses. +The :func:`!match_hostname` function now supports matching of IP addresses. (Contributed by Antoine Pitrou in :issue:`23239`.) @@ -1863,10 +1862,10 @@ Examples:: sys --- -A new :func:`~sys.set_coroutine_wrapper` function allows setting a global +A new :func:`!set_coroutine_wrapper` function allows setting a global hook that will be called whenever a :term:`coroutine object ` is created by an :keyword:`async def` function. A corresponding -:func:`~sys.get_coroutine_wrapper` can be used to obtain a currently set +:func:`!get_coroutine_wrapper` can be used to obtain a currently set wrapper. Both functions are :term:`provisional `, and are intended for debugging purposes only. (Contributed by Yury Selivanov in :issue:`24017`.) @@ -2014,8 +2013,9 @@ The :class:`~unittest.mock.Mock` class has the following improvements: method to check if the mock object was called. (Contributed by Kushal Das in :issue:`21262`.) -The :class:`~unittest.mock.MagicMock` class now supports :meth:`__truediv__`, -:meth:`__divmod__` and :meth:`__matmul__` operators. +The :class:`~unittest.mock.MagicMock` class now supports +:meth:`~object.__truediv__`, :meth:`~object.__divmod__` +and :meth:`~object.__matmul__` operators. (Contributed by Johannes Baiter in :issue:`20968`, and Håkan Lövdahl in :issue:`23581` and :issue:`23568`.) @@ -2290,10 +2290,10 @@ Windows XP is no longer supported by Microsoft, thus, per :PEP:`11`, CPython Deprecated Python modules, functions and methods ------------------------------------------------ -The :mod:`formatter` module has now graduated to full deprecation and is still +The :mod:`!formatter` module has now graduated to full deprecation and is still slated for removal in Python 3.6. -The :func:`asyncio.async` function is deprecated in favor of +The :func:`!asyncio.async` function is deprecated in favor of :func:`~asyncio.ensure_future`. The :mod:`!smtpd` module has in the past always decoded the DATA portion of @@ -2314,7 +2314,7 @@ Passing a format string as keyword argument *format_string* to the class has been deprecated. (Contributed by Serhiy Storchaka in :issue:`23671`.) -The :func:`platform.dist` and :func:`platform.linux_distribution` functions +The :func:`!platform.dist` and :func:`!platform.linux_distribution` functions are now deprecated. Linux distributions use too many different ways of describing themselves, so the functionality is left to a package. (Contributed by Vajrasky Kok and Berker Peksag in :issue:`1322`.) @@ -2324,11 +2324,11 @@ The previously undocumented ``from_function`` and ``from_builtin`` methods of :meth:`Signature.from_callable() ` method instead. (Contributed by Yury Selivanov in :issue:`24248`.) -The :func:`inspect.getargspec` function is deprecated and scheduled to be +The :func:`!inspect.getargspec` function is deprecated and scheduled to be removed in Python 3.6. (See :issue:`20438` for details.) The :mod:`inspect` :func:`~inspect.getfullargspec`, -:func:`~inspect.getcallargs`, and :func:`~inspect.formatargspec` functions are +:func:`~inspect.getcallargs`, and :func:`!formatargspec` functions are deprecated in favor of the :func:`inspect.signature` API. (Contributed by Yury Selivanov in :issue:`20438`.) @@ -2405,7 +2405,7 @@ Changes in the Python API error-prone and has been removed in Python 3.5. See :issue:`13936` for full details. -* The :meth:`ssl.SSLSocket.send` method now raises either +* The :meth:`!ssl.SSLSocket.send` method now raises either :exc:`ssl.SSLWantReadError` or :exc:`ssl.SSLWantWriteError` on a non-blocking socket if the operation would block. Previously, it would return ``0``. (Contributed by Nikolaus Rath in :issue:`20951`.) @@ -2440,12 +2440,12 @@ Changes in the Python API :mod:`http.client` and :mod:`http.server` remain available for backwards compatibility. (Contributed by Demian Brecht in :issue:`21793`.) -* When an import loader defines :meth:`importlib.machinery.Loader.exec_module` +* When an import loader defines :meth:`~importlib.abc.Loader.exec_module` it is now expected to also define - :meth:`~importlib.machinery.Loader.create_module` (raises a + :meth:`~importlib.abc.Loader.create_module` (raises a :exc:`DeprecationWarning` now, will be an error in Python 3.6). If the loader inherits from :class:`importlib.abc.Loader` then there is nothing to do, else - simply define :meth:`~importlib.machinery.Loader.create_module` to return + simply define :meth:`~importlib.abc.Loader.create_module` to return ``None``. (Contributed by Brett Cannon in :issue:`23014`.) * The :func:`re.split` function always ignored empty pattern matches, so the @@ -2488,7 +2488,7 @@ Changes in the Python API the POT-Creation-Date header. * The :mod:`smtplib` module now uses :data:`sys.stderr` instead of the previous - module-level :data:`stderr` variable for debug output. If your (test) + module-level :data:`!stderr` variable for debug output. If your (test) program depends on patching the module-level variable to capture the debug output, you will need to update it to capture sys.stderr instead. @@ -2514,11 +2514,11 @@ Changes in the C API -------------------- * The undocumented :c:member:`!format` member of the - (non-public) :c:type:`PyMemoryViewObject` structure has been removed. + (non-public) :c:type:`!PyMemoryViewObject` structure has been removed. All extensions relying on the relevant parts in ``memoryobject.h`` must be rebuilt. -* The :c:type:`PyMemAllocator` structure was renamed to +* The :c:type:`!PyMemAllocator` structure was renamed to :c:type:`PyMemAllocatorEx` and a new ``calloc`` field was added. * Removed non-documented macro :c:macro:`!PyObject_REPR()` which leaked references. diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 050c9103b00c98..b1e3269239d629 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -745,7 +745,7 @@ Some smaller changes made to the core Python language are: * It is now possible to set a :ref:`special method ` to ``None`` to indicate that the corresponding operation is not available. - For example, if a class sets :meth:`__iter__` to ``None``, the class + For example, if a class sets :meth:`~object.__iter__` to ``None``, the class is not iterable. (Contributed by Andrew Barnert and Ivan Levkivskyi in :issue:`25958`.) @@ -871,7 +871,7 @@ Notable changes in the :mod:`asyncio` module since Python 3.5.0 of the last iteration will be discarded. (Contributed by Guido van Rossum in :issue:`25593`.) -* :meth:`Future.set_exception ` +* :meth:`Future.set_exception ` will now raise :exc:`TypeError` when passed an instance of the :exc:`StopIteration` exception. (Contributed by Chris Angelico in :issue:`26221`.) @@ -925,7 +925,7 @@ added to represent sized iterable container classes. (Contributed by Ivan Levkivskyi, docs by Neil Girdhar in :issue:`27598`.) The new :class:`~collections.abc.Reversible` abstract base class represents -iterable classes that also provide the :meth:`__reversed__` method. +iterable classes that also provide the :meth:`~object.__reversed__` method. (Contributed by Ivan Levkivskyi in :issue:`25987`.) The new :class:`~collections.abc.AsyncGenerator` abstract base class represents @@ -971,7 +971,7 @@ datetime -------- The :class:`~datetime.datetime` and :class:`~datetime.time` classes have -the new :attr:`~time.fold` attribute used to disambiguate local time +the new :attr:`~datetime.time.fold` attribute used to disambiguate local time when necessary. Many functions in the :mod:`datetime` have been updated to support local time disambiguation. See :ref:`Local Time Disambiguation ` section for more @@ -1052,12 +1052,12 @@ enum ---- Two new enumeration base classes have been added to the :mod:`enum` module: -:class:`~enum.Flag` and :class:`~enum.IntFlags`. Both are used to define +:class:`~enum.Flag` and :class:`~enum.IntFlag`. Both are used to define constants that can be combined using the bitwise operators. (Contributed by Ethan Furman in :issue:`23591`.) Many standard library modules have been updated to use the -:class:`~enum.IntFlags` class for their constants. +:class:`~enum.IntFlag` class for their constants. The new :class:`enum.auto` value can be used to assign values to enum members automatically:: @@ -1224,7 +1224,7 @@ generator expression scopes as if they were positional-only parameters called ``implicit0``. (Contributed by Jelle Zijlstra in :issue:`19611`.) To reduce code churn when upgrading from Python 2.7 and the legacy -:func:`inspect.getargspec` API, the previously documented deprecation of +:func:`!inspect.getargspec` API, the previously documented deprecation of :func:`inspect.getfullargspec` has been reversed. While this function is convenient for single/source Python 2/3 code bases, the richer :func:`inspect.signature` interface remains the recommended approach for new @@ -1275,7 +1275,7 @@ See the summary of :ref:`PEP 519 ` for details on how the A new :meth:`~os.scandir.close` method allows explicitly closing a :func:`~os.scandir` iterator. The :func:`~os.scandir` iterator now -supports the :term:`context manager` protocol. If a :func:`scandir` +supports the :term:`context manager` protocol. If a :func:`!scandir` iterator is neither exhausted nor explicitly closed a :exc:`ResourceWarning` will be emitted in its destructor. (Contributed by Serhiy Storchaka in :issue:`25994`.) @@ -1434,7 +1434,7 @@ defined in :mod:`http.server`, :mod:`xmlrpc.server` and protocol. (Contributed by Aviv Palivoda in :issue:`26404`.) -The :attr:`~socketserver.StreamRequestHandler.wfile` attribute of +The :attr:`wfile ` attribute of :class:`~socketserver.StreamRequestHandler` classes now implements the :class:`io.BufferedIOBase` writable interface. In particular, calling :meth:`~io.BufferedIOBase.write` is now guaranteed to send the @@ -1465,7 +1465,7 @@ The new :meth:`~ssl.SSLContext.get_ciphers` method can be used to get a list of enabled ciphers in order of cipher priority. All constants and flags have been converted to :class:`~enum.IntEnum` and -:class:`~enum.IntFlags`. +:class:`~enum.IntFlag`. (Contributed by Christian Heimes in :issue:`28025`.) Server and client-side specific TLS protocols for :class:`~ssl.SSLContext` @@ -1531,8 +1531,8 @@ Stéphane Wirtel in :issue:`25485`). time ---- -The :class:`~time.struct_time` attributes :attr:`tm_gmtoff` and -:attr:`tm_zone` are now available on all platforms. +The :class:`~time.struct_time` attributes :attr:`!tm_gmtoff` and +:attr:`!tm_zone` are now available on all platforms. timeit @@ -1551,12 +1551,12 @@ between best and worst times. tkinter ------- -Added methods :meth:`~tkinter.Variable.trace_add`, -:meth:`~tkinter.Variable.trace_remove` and :meth:`~tkinter.Variable.trace_info` -in the :class:`tkinter.Variable` class. They replace old methods -:meth:`~tkinter.Variable.trace_variable`, :meth:`~tkinter.Variable.trace`, -:meth:`~tkinter.Variable.trace_vdelete` and -:meth:`~tkinter.Variable.trace_vinfo` that use obsolete Tcl commands and might +Added methods :meth:`!Variable.trace_add`, +:meth:`!Variable.trace_remove` and :meth:`!trace_info` +in the :class:`!tkinter.Variable` class. They replace old methods +:meth:`!trace_variable`, :meth:`!trace`, +:meth:`!trace_vdelete` and +:meth:`!trace_vinfo` that use obsolete Tcl commands and might not work in future versions of Tcl. (Contributed by Serhiy Storchaka in :issue:`22115`). @@ -1674,8 +1674,8 @@ urllib.request If a HTTP request has a file or iterable body (other than a bytes object) but no ``Content-Length`` header, rather than -throwing an error, :class:`~urllib.request.AbstractHTTPHandler` now -falls back to use chunked transfer encoding. +throwing an error, :class:`AbstractHTTPHandler ` +now falls back to use chunked transfer encoding. (Contributed by Demian Brecht and Rolf Krahl in :issue:`12319`.) @@ -1701,7 +1701,7 @@ warnings A new optional *source* parameter has been added to the :func:`warnings.warn_explicit` function: the destroyed object which emitted a :exc:`ResourceWarning`. A *source* attribute has also been added to -:class:`warnings.WarningMessage` (contributed by Victor Stinner in +:class:`!warnings.WarningMessage` (contributed by Victor Stinner in :issue:`26568` and :issue:`26567`). When a :exc:`ResourceWarning` warning is logged, the :mod:`tracemalloc` module is now @@ -1942,7 +1942,7 @@ Raising the :exc:`StopIteration` exception inside a generator will now generate a :exc:`DeprecationWarning`, and will trigger a :exc:`RuntimeError` in Python 3.7. See :ref:`whatsnew-pep-479` for details. -The :meth:`__aiter__` method is now expected to return an asynchronous +The :meth:`~object.__aiter__` method is now expected to return an asynchronous iterator directly instead of returning an awaitable as previously. Doing the former will trigger a :exc:`DeprecationWarning`. Backward compatibility will be removed in Python 3.7. @@ -2189,7 +2189,7 @@ Changes in the Python API booleans being a subclass of integers, this should only be an issue if you were doing identity checks for ``1`` or ``0``. See :issue:`25768`. -* Reading the :attr:`~urllib.parse.SplitResult.port` attribute of +* Reading the :attr:`!port` attribute of :func:`urllib.parse.urlsplit` and :func:`~urllib.parse.urlparse` results now raises :exc:`ValueError` for out-of-range values, rather than returning :const:`None`. See :issue:`20059`. @@ -2197,8 +2197,8 @@ Changes in the Python API * The :mod:`!imp` module now raises a :exc:`DeprecationWarning` instead of :exc:`PendingDeprecationWarning`. -* The following modules have had missing APIs added to their :attr:`__all__` - attributes to match the documented APIs: +* The following modules have had missing APIs added to their + :attr:`~module.__all__` attributes to match the documented APIs: :mod:`calendar`, :mod:`!cgi`, :mod:`csv`, :mod:`~xml.etree.ElementTree`, :mod:`enum`, :mod:`fileinput`, :mod:`ftplib`, :mod:`logging`, :mod:`mailbox`, @@ -2253,11 +2253,13 @@ Changes in the Python API * As part of :pep:`487`, the handling of keyword arguments passed to :class:`type` (other than the metaclass hint, ``metaclass``) is now consistently delegated to :meth:`object.__init_subclass__`. This means that - :meth:`type.__new__` and :meth:`type.__init__` both now accept arbitrary - keyword arguments, but :meth:`object.__init_subclass__` (which is called from - :meth:`type.__new__`) will reject them by default. Custom metaclasses - accepting additional keyword arguments will need to adjust their calls to - :meth:`type.__new__` (whether direct or via :class:`super`) accordingly. + :meth:`type.__new__ ` and :meth:`type.__init__ + ` both now accept arbitrary keyword arguments, + but :meth:`object.__init_subclass__` (which is called from + :meth:`type.__new__ `) will reject them by default. + Custom metaclasses accepting additional keyword arguments will need to adjust + their calls to :meth:`type.__new__ ` + (whether direct or via :class:`super`) accordingly. * In ``distutils.command.sdist.sdist``, the ``default_format`` attribute has been removed and is no longer honored. Instead, the @@ -2305,7 +2307,7 @@ Changes in the Python API real-world compatibility. (Contributed by Lita Cho in :issue:`21815`.) -* The :func:`mmap.write() ` function now returns the number +* The :func:`mmap.mmap.write` function now returns the number of bytes written like other write methods. (Contributed by Jakub Stasiak in :issue:`26335`.) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index f420fa5c04479b..9ac3bf53833f93 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -320,9 +320,9 @@ effort will be made to add such support. PEP 562: Customization of Access to Module Attributes ----------------------------------------------------- -Python 3.7 allows defining :meth:`__getattr__` on modules and will call +Python 3.7 allows defining :meth:`~module.__getattr__` on modules and will call it whenever a module attribute is otherwise not found. Defining -:meth:`__dir__` on modules is now also allowed. +:meth:`~module.__dir__` on modules is now also allowed. A typical example of where this may be useful is module attribute deprecation and lazy loading. @@ -409,8 +409,8 @@ PEP 560: Core Support for ``typing`` module and Generic Types Initially :pep:`484` was designed in such way that it would not introduce *any* changes to the core CPython interpreter. Now type hints and the :mod:`typing` module are extensively used by the community, so this restriction is removed. -The PEP introduces two special methods :meth:`__class_getitem__` and -``__mro_entries__``, these methods are now used by most classes and special +The PEP introduces two special methods :meth:`~object.__class_getitem__` and +:meth:`~object.__mro_entries__`, these methods are now used by most classes and special constructs in :mod:`typing`. As a result, the speed of various operations with types increased up to 7 times, the generic types can be used without metaclass conflicts, and several long standing bugs in :mod:`typing` module are @@ -603,7 +603,7 @@ The new :mod:`importlib.resources` module provides several new APIs and one new ABC for access to, opening, and reading *resources* inside packages. Resources are roughly similar to files inside packages, but they needn't be actual files on the physical file system. Module loaders can provide a -:meth:`get_resource_reader` function which returns +:meth:`!get_resource_reader` function which returns a :class:`importlib.abc.ResourceReader` instance to support this new API. Built-in file path loaders and zip file loaders both support this. @@ -910,9 +910,9 @@ which allows listing the names of properties which should not become enum members. (Contributed by Ethan Furman in :issue:`31801`.) -In Python 3.8, attempting to check for non-Enum objects in :class:`Enum` +In Python 3.8, attempting to check for non-Enum objects in :class:`~enum.Enum` classes will raise a :exc:`TypeError` (e.g. ``1 in Color``); similarly, -attempting to check for non-Flag objects in a :class:`Flag` member will +attempting to check for non-Flag objects in a :class:`~enum.Flag` member will raise :exc:`TypeError` (e.g. ``1 in Perm.RW``); currently, both operations return :const:`False` instead and are deprecated. (Contributed by Ethan Furman in :issue:`33217`.) @@ -969,7 +969,7 @@ uses the current working directory. (Contributed by Stéphane Wirtel and Julien Palard in :issue:`28707`.) The new :class:`ThreadingHTTPServer ` class -uses threads to handle requests using :class:`~socketserver.ThreadingMixin`. +uses threads to handle requests using :class:`~socketserver.ThreadingMixIn`. It is used when ``http.server`` is run with ``-m``. (Contributed by Julien Palard in :issue:`31639`.) @@ -1052,12 +1052,12 @@ support the loading of resources from packages. See also lacks a spec. (Contributed by Garvit Khatri in :issue:`29851`.) -:func:`importlib.find_spec` now raises :exc:`ModuleNotFoundError` instead of +:func:`importlib.util.find_spec` now raises :exc:`ModuleNotFoundError` instead of :exc:`AttributeError` if the specified parent module is not a package (i.e. lacks a ``__path__`` attribute). (Contributed by Milan Oberkirch in :issue:`30436`.) -The new :func:`importlib.source_hash` can be used to compute the hash of +The new :func:`importlib.util.source_hash` can be used to compute the hash of the passed source. A :ref:`hash-based .pyc file ` embeds the value returned by this function. @@ -1148,7 +1148,7 @@ running. (Contributed by Antoine Pitrou in :issue:`30596`.) The new :meth:`Process.kill() ` method can -be used to terminate the process using the :data:`SIGKILL` signal on Unix. +be used to terminate the process using the :data:`~signal.SIGKILL` signal on Unix. (Contributed by Vitor Pereira in :issue:`30794`.) Non-daemonic threads created by :class:`~multiprocessing.Process` are now @@ -1280,9 +1280,10 @@ This function should be used instead of :func:`os.close` for better compatibility across platforms. (Contributed by Christian Heimes in :issue:`32454`.) -The :mod:`socket` module now exposes the :const:`socket.TCP_CONGESTION` -(Linux 2.6.13), :const:`socket.TCP_USER_TIMEOUT` (Linux 2.6.37), and -:const:`socket.TCP_NOTSENT_LOWAT` (Linux 3.12) constants. +The :mod:`socket` module now exposes the :ref:`socket.TCP_CONGESTION +` (Linux 2.6.13), :ref:`socket.TCP_USER_TIMEOUT +` (Linux 2.6.37), and :ref:`socket.TCP_NOTSENT_LOWAT +` (Linux 3.12) constants. (Contributed by Omar Sandoval in :issue:`26273` and Nathaniel J. Smith in :issue:`29728`.) @@ -1298,11 +1299,14 @@ by default. socketserver ------------ -:meth:`socketserver.ThreadingMixIn.server_close` now waits until all non-daemon -threads complete. :meth:`socketserver.ForkingMixIn.server_close` now waits +:meth:`socketserver.ThreadingMixIn.server_close +` now waits until all non-daemon +threads complete. :meth:`socketserver.ForkingMixIn.server_close +` now waits until all child processes complete. -Add a new :attr:`socketserver.ForkingMixIn.block_on_close` class attribute to +Add a new :attr:`socketserver.ForkingMixIn.block_on_close +` class attribute to :class:`socketserver.ForkingMixIn` and :class:`socketserver.ThreadingMixIn` classes. Set the class attribute to ``False`` to get the pre-3.7 behaviour. @@ -1323,7 +1327,7 @@ ssl --- The :mod:`ssl` module now uses OpenSSL's builtin API instead of -:func:`~ssl.match_hostname` to check a host name or an IP address. Values +:func:`!match_hostname` to check a host name or an IP address. Values are validated during TLS handshake. Any certificate validation error including failing the host name check now raises :exc:`~ssl.SSLCertVerificationError` and aborts the handshake with a proper @@ -1341,7 +1345,7 @@ Host name validation can be customized with The ``ssl`` module no longer sends IP addresses in SNI TLS extension. (Contributed by Christian Heimes in :issue:`32185`.) -:func:`~ssl.match_hostname` no longer supports partial wildcards like +:func:`!match_hostname` no longer supports partial wildcards like ``www*.example.org``. (Contributed by Mandeep Singh in :issue:`23033` and Christian Heimes in :issue:`31399`.) @@ -1438,7 +1442,7 @@ The new :func:`sys.get_coroutine_origin_tracking_depth` function returns the current coroutine origin tracking depth, as set by the new :func:`sys.set_coroutine_origin_tracking_depth`. :mod:`asyncio` has been converted to use this new API instead of -the deprecated :func:`sys.set_coroutine_wrapper`. +the deprecated :func:`!sys.set_coroutine_wrapper`. (Contributed by Nathaniel J. Smith in :issue:`32591`.) @@ -1615,7 +1619,7 @@ external entities by default. xml.etree --------- -:ref:`ElementPath ` predicates in the :meth:`find` +:ref:`ElementPath ` predicates in the :meth:`!find` methods can now compare text of the current node with ``[. = "text"]``, not only text in children. Predicates also allow adding spaces for better readability. (Contributed by Stefan Behnel in :issue:`31648`.) @@ -1624,7 +1628,7 @@ better readability. (Contributed by Stefan Behnel in :issue:`31648`.) xmlrpc.server ------------- -:meth:`SimpleXMLRPCDispatcher.register_function ` +:meth:`!SimpleXMLRPCDispatcher.register_function` can now be used as a decorator. (Contributed by Xiang Zhang in :issue:`7769`.) @@ -1682,15 +1686,15 @@ The :mod:`tracemalloc` now exposes a C API through the new functions. (Contributed by Victor Stinner in :issue:`30054`.) -The new :c:func:`import__find__load__start` and -:c:func:`import__find__load__done` static markers can be used to trace -module imports. +The new :ref:`import__find__load__start ` and +:ref:`import__find__load__done ` static markers can be used +to trace module imports. (Contributed by Christian Heimes in :issue:`31574`.) The fields :c:member:`!name` and :c:member:`!doc` of structures :c:type:`PyMemberDef`, :c:type:`PyGetSetDef`, :c:type:`PyStructSequence_Field`, :c:type:`PyStructSequence_Desc`, -and :c:struct:`wrapperbase` are now of type ``const char *`` rather of +and :c:struct:`!wrapperbase` are now of type ``const char *`` rather of ``char *``. (Contributed by Serhiy Storchaka in :issue:`28761`.) The result of :c:func:`PyUnicode_AsUTF8AndSize` and :c:func:`PyUnicode_AsUTF8` @@ -1719,8 +1723,8 @@ Added C API support for timezones with timezone constructors and access to the UTC singleton with :c:data:`PyDateTime_TimeZone_UTC`. Contributed by Paul Ganssle in :issue:`10381`. -The type of results of :c:func:`PyThread_start_new_thread` and -:c:func:`PyThread_get_thread_ident`, and the *id* parameter of +The type of results of :c:func:`!PyThread_start_new_thread` and +:c:func:`!PyThread_get_thread_ident`, and the *id* parameter of :c:func:`PyThreadState_SetAsyncExc` changed from :c:expr:`long` to :c:expr:`unsigned long`. (Contributed by Serhiy Storchaka in :issue:`6532`.) @@ -1847,8 +1851,8 @@ make the creation of named tuples 4 to 6 times faster. (Contributed by Jelle Zijlstra with further improvements by INADA Naoki, Serhiy Storchaka, and Raymond Hettinger in :issue:`28638`.) -:meth:`date.fromordinal` and :meth:`date.fromtimestamp` are now up to -30% faster in the common case. +:meth:`datetime.date.fromordinal` and :meth:`datetime.date.fromtimestamp` +are now up to 30% faster in the common case. (Contributed by Paul Ganssle in :issue:`32403`.) The :func:`os.fwalk` function is now up to 2 times faster thanks to @@ -1997,9 +2001,9 @@ modes (this will be an error in future Python releases). enum ---- -In Python 3.8, attempting to check for non-Enum objects in :class:`Enum` +In Python 3.8, attempting to check for non-Enum objects in :class:`~enum.Enum` classes will raise a :exc:`TypeError` (e.g. ``1 in Color``); similarly, -attempting to check for non-Flag objects in a :class:`Flag` member will +attempting to check for non-Flag objects in a :class:`~enum.Flag` member will raise :exc:`TypeError` (e.g. ``1 in Perm.RW``); currently, both operations return :const:`False` instead. (Contributed by Ethan Furman in :issue:`33217`.) @@ -2034,14 +2038,14 @@ favour of :class:`importlib.abc.ResourceReader`. locale ------ -:func:`locale.format` has been deprecated, use :meth:`locale.format_string` +:func:`!locale.format` has been deprecated, use :meth:`locale.format_string` instead. (Contributed by Garvit in :issue:`10379`.) macpath ------- -The :mod:`macpath` is now deprecated and will be removed in Python 3.8. +The :mod:`!macpath` is now deprecated and will be removed in Python 3.8. (Contributed by Chi Hsuan Yen in :issue:`9850`.) @@ -2066,7 +2070,7 @@ if the passed argument is larger than 16 bits, an exception will be raised. ssl --- -:func:`ssl.wrap_socket` is deprecated. Use +:func:`!ssl.wrap_socket` is deprecated. Use :meth:`ssl.SSLContext.wrap_socket` instead. (Contributed by Christian Heimes in :issue:`28124`.) @@ -2082,8 +2086,8 @@ Use :func:`!sunau.open` instead. sys --- -Deprecated :func:`sys.set_coroutine_wrapper` and -:func:`sys.get_coroutine_wrapper`. +Deprecated :func:`!sys.set_coroutine_wrapper` and +:func:`!sys.get_coroutine_wrapper`. The undocumented ``sys.callstats()`` function has been deprecated and will be removed in a future Python version. @@ -2093,7 +2097,7 @@ will be removed in a future Python version. wave ---- -:func:`wave.openfp` has been deprecated and will be removed in Python 3.9. +:func:`!wave.openfp` has been deprecated and will be removed in Python 3.9. Use :func:`wave.open` instead. (Contributed by Brian Curtin in :issue:`31985`.) @@ -2173,8 +2177,8 @@ The following features and APIs have been removed from Python 3.7: * Removed previously deprecated in Python 2.4 classes ``Plist``, ``Dict`` and ``_InternalDict`` in the :mod:`plistlib` module. Dict values in the result - of functions :func:`~plistlib.readPlist` and - :func:`~plistlib.readPlistFromBytes` are now normal dicts. You no longer + of functions :func:`!readPlist` and + :func:`!readPlistFromBytes` are now normal dicts. You no longer can use attribute access to access items of these dictionaries. * The ``asyncio.windows_utils.socketpair()`` function has been @@ -2191,7 +2195,7 @@ The following features and APIs have been removed from Python 3.7: * Direct instantiation of :class:`ssl.SSLSocket` and :class:`ssl.SSLObject` objects is now prohibited. The constructors were never documented, tested, or designed as public constructors. Users were supposed to use - :func:`ssl.wrap_socket` or :class:`ssl.SSLContext`. + :func:`!ssl.wrap_socket` or :class:`ssl.SSLContext`. (Contributed by Christian Heimes in :issue:`32951`.) * The unused ``distutils`` ``install_misc`` command has been removed. @@ -2275,15 +2279,18 @@ Changes in Python Behavior Changes in the Python API ------------------------- -* :meth:`socketserver.ThreadingMixIn.server_close` now waits until all +* :meth:`socketserver.ThreadingMixIn.server_close + ` now waits until all non-daemon threads complete. Set the new :attr:`socketserver.ThreadingMixIn.block_on_close` class attribute to ``False`` to get the pre-3.7 behaviour. (Contributed by Victor Stinner in :issue:`31233` and :issue:`33540`.) -* :meth:`socketserver.ForkingMixIn.server_close` now waits until all +* :meth:`socketserver.ForkingMixIn.server_close + ` now waits until all child processes complete. Set the new - :attr:`socketserver.ForkingMixIn.block_on_close` class attribute to ``False`` + :attr:`socketserver.ForkingMixIn.block_on_close + ` class attribute to ``False`` to get the pre-3.7 behaviour. (Contributed by Victor Stinner in :issue:`31151` and :issue:`33540`.) @@ -2476,7 +2483,7 @@ avoiding possible problems use new functions :c:func:`PySlice_Unpack` and CPython bytecode changes ------------------------ -There are two new opcodes: :opcode:`LOAD_METHOD` and :opcode:`!CALL_METHOD`. +There are two new opcodes: :opcode:`!LOAD_METHOD` and :opcode:`!CALL_METHOD`. (Contributed by Yury Selivanov and INADA Naoki in :issue:`26110`.) The :opcode:`!STORE_ANNOTATION` opcode has been removed. diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 7aca35b2959cd2..6407e5c213e389 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -536,7 +536,7 @@ Other Language Changes element is a callable with a ``(obj, state)`` signature. This allows the direct control over the state-updating behavior of a specific object. If not *None*, this callable will have priority over the object's - :meth:`~__setstate__` method. + :meth:`~object.__setstate__` method. (Contributed by Pierre Glaser and Olivier Grisel in :issue:`35900`.) New Modules @@ -1035,9 +1035,9 @@ symlinks and directory junctions) has been delegated to the operating system. Specifically, :func:`os.stat` will now traverse anything supported by the operating system, while :func:`os.lstat` will only open reparse points that identify as "name surrogates" while others are opened as for :func:`os.stat`. -In all cases, :attr:`stat_result.st_mode` will only have ``S_IFLNK`` set for +In all cases, :attr:`os.stat_result.st_mode` will only have ``S_IFLNK`` set for symbolic links and not other kinds of reparse points. To identify other kinds -of reparse point, check the new :attr:`stat_result.st_reparse_tag` attribute. +of reparse point, check the new :attr:`os.stat_result.st_reparse_tag` attribute. On Windows, :func:`os.readlink` is now able to read directory junctions. Note that :func:`~os.path.islink` will return ``False`` for directory junctions, @@ -1285,20 +1285,20 @@ now matches what the C tokenizer does internally. tkinter ------- -Added methods :meth:`~tkinter.Spinbox.selection_from`, -:meth:`~tkinter.Spinbox.selection_present`, -:meth:`~tkinter.Spinbox.selection_range` and -:meth:`~tkinter.Spinbox.selection_to` -in the :class:`tkinter.Spinbox` class. +Added methods :meth:`!selection_from`, +:meth:`!selection_present`, +:meth:`!selection_range` and +:meth:`!selection_to` +in the :class:`!tkinter.Spinbox` class. (Contributed by Juliette Monsel in :issue:`34829`.) -Added method :meth:`~tkinter.Canvas.moveto` -in the :class:`tkinter.Canvas` class. +Added method :meth:`!moveto` +in the :class:`!tkinter.Canvas` class. (Contributed by Juliette Monsel in :issue:`23831`.) -The :class:`tkinter.PhotoImage` class now has -:meth:`~tkinter.PhotoImage.transparency_get` and -:meth:`~tkinter.PhotoImage.transparency_set` methods. (Contributed by +The :class:`!tkinter.PhotoImage` class now has +:meth:`!transparency_get` and +:meth:`!transparency_set` methods. (Contributed by Zackery Spytz in :issue:`25451`.) @@ -1432,7 +1432,7 @@ and ``{namespace}*`` which returns all tags in the given namespace. (Contributed by Stefan Behnel in :issue:`28238`.) The :mod:`xml.etree.ElementTree` module provides a new function -:func:`–xml.etree.ElementTree.canonicalize` that implements C14N 2.0. +:func:`~xml.etree.ElementTree.canonicalize` that implements C14N 2.0. (Contributed by Stefan Behnel in :issue:`13611`.) The target object of :class:`xml.etree.ElementTree.XMLParser` can @@ -1573,7 +1573,7 @@ Build and C API Changes * :c:func:`Py_INCREF`, :c:func:`Py_DECREF` * :c:func:`Py_XINCREF`, :c:func:`Py_XDECREF` - * :c:func:`PyObject_INIT`, :c:func:`PyObject_INIT_VAR` + * :c:macro:`!PyObject_INIT`, :c:macro:`!PyObject_INIT_VAR` * Private functions: :c:func:`!_PyObject_GC_TRACK`, :c:func:`!_PyObject_GC_UNTRACK`, :c:func:`!_Py_Dealloc` @@ -1677,7 +1677,7 @@ Deprecated constant nodes. (Contributed by Serhiy Storchaka in :issue:`36917`.) -* The :func:`asyncio.coroutine` :term:`decorator` is deprecated and will be +* The :deco:`!asyncio.coroutine` :term:`decorator` is deprecated and will be removed in version 3.10. Instead of ``@asyncio.coroutine``, use :keyword:`async def` instead. (Contributed by Andrew Svetlov in :issue:`36921`.) @@ -1697,22 +1697,22 @@ Deprecated (Contributed by Yury Selivanov in :issue:`34790`.) * The following functions and methods are deprecated in the :mod:`gettext` - module: :func:`~gettext.lgettext`, :func:`~gettext.ldgettext`, - :func:`~gettext.lngettext` and :func:`~gettext.ldngettext`. + module: :func:`!lgettext`, :func:`!ldgettext`, + :func:`!lngettext` and :func:`!ldngettext`. They return encoded bytes, and it's possible that you will get unexpected Unicode-related exceptions if there are encoding problems with the translated strings. It's much better to use alternatives which return Unicode strings in Python 3. These functions have been broken for a long time. - Function :func:`~gettext.bind_textdomain_codeset`, methods - :meth:`~gettext.NullTranslations.output_charset` and - :meth:`~gettext.NullTranslations.set_output_charset`, and the *codeset* + Function :func:`!bind_textdomain_codeset`, methods + :meth:`!NullTranslations.output_charset` and + :meth:`!NullTranslations.set_output_charset`, and the *codeset* parameter of functions :func:`~gettext.translation` and :func:`~gettext.install` are also deprecated, since they are only used for the ``l*gettext()`` functions. (Contributed by Serhiy Storchaka in :issue:`33710`.) -* The :meth:`~threading.Thread.isAlive` method of :class:`threading.Thread` +* The :meth:`!isAlive` method of :class:`threading.Thread` has been deprecated. (Contributed by Donghee Na in :issue:`35283`.) @@ -1727,7 +1727,7 @@ Deprecated * Deprecated passing the following arguments as keyword arguments: - *func* in :func:`functools.partialmethod`, :func:`weakref.finalize`, - :meth:`profile.Profile.runcall`, :meth:`cProfile.Profile.runcall`, + :meth:`profile.Profile.runcall`, :meth:`!cProfile.Profile.runcall`, :meth:`bdb.Bdb.runcall`, :meth:`trace.Trace.runfunc` and :func:`curses.wrapper`. - *function* in :meth:`unittest.TestCase.addCleanup`. @@ -1735,11 +1735,11 @@ Deprecated :class:`concurrent.futures.ThreadPoolExecutor` and :class:`concurrent.futures.ProcessPoolExecutor`. - *callback* in :meth:`contextlib.ExitStack.callback`, - :meth:`contextlib.AsyncExitStack.callback` and + :meth:`!contextlib.AsyncExitStack.callback` and :meth:`contextlib.AsyncExitStack.push_async_callback`. - - *c* and *typeid* in the :meth:`~multiprocessing.managers.Server.create` - method of :class:`multiprocessing.managers.Server` and - :class:`multiprocessing.managers.SharedMemoryServer`. + - *c* and *typeid* in the :meth:`!create` + method of :class:`!multiprocessing.managers.Server` and + :class:`!multiprocessing.managers.SharedMemoryServer`. - *obj* in :func:`weakref.finalize`. In future releases of Python, they will be :ref:`positional-only @@ -1757,14 +1757,14 @@ The following features and APIs have been removed from Python 3.8: able to import from collections was marked for removal in 3.8, but has been delayed to 3.9. (See :gh:`81134`.) -* The :mod:`macpath` module, deprecated in Python 3.7, has been removed. +* The :mod:`!macpath` module, deprecated in Python 3.7, has been removed. (Contributed by Victor Stinner in :issue:`35471`.) -* The function :func:`platform.popen` has been removed, after having been +* The function :func:`!platform.popen` has been removed, after having been deprecated since Python 3.3: use :func:`os.popen` instead. (Contributed by Victor Stinner in :issue:`35345`.) -* The function :func:`time.clock` has been removed, after having been +* The function :func:`!time.clock` has been removed, after having been deprecated since Python 3.3: use :func:`time.perf_counter` or :func:`time.process_time` instead, depending on your requirements, to have well-defined behavior. @@ -1800,8 +1800,8 @@ The following features and APIs have been removed from Python 3.8: :func:`fileinput.FileInput` which was ignored and deprecated since Python 3.6 has been removed. :issue:`36952` (Contributed by Matthias Bussonnier.) -* The functions :func:`sys.set_coroutine_wrapper` and - :func:`sys.get_coroutine_wrapper` deprecated in Python 3.7 have been removed; +* The functions :func:`!sys.set_coroutine_wrapper` and + :func:`!sys.get_coroutine_wrapper` deprecated in Python 3.7 have been removed; :issue:`36933` (Contributed by Matthias Bussonnier.) @@ -1864,9 +1864,10 @@ Changes in the Python API * :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases for better performance. On Windows Subsystem for Linux and QEMU User - Emulation, the :class:`Popen` constructor using :func:`os.posix_spawn` no longer raises an - exception on errors like "missing program". Instead the child process fails with a - non-zero :attr:`~Popen.returncode`. + Emulation, the :class:`~subprocess.Popen` constructor using + :func:`os.posix_spawn` no longer raises an exception on errors like + "missing program". Instead the child process fails with a + non-zero :attr:`~subprocess.Popen.returncode`. (Contributed by Joannah Nanjekye and Victor Stinner in :issue:`35537`.) * The *preexec_fn* argument of * :class:`subprocess.Popen` is no longer @@ -1875,11 +1876,11 @@ Changes in the Python API (Contributed by Eric Snow in :issue:`34651`, modified by Christian Heimes in :issue:`37951`.) -* The :meth:`imap.IMAP4.logout` method no longer silently ignores arbitrary +* The :meth:`imaplib.IMAP4.logout` method no longer silently ignores arbitrary exceptions. (Contributed by Victor Stinner in :issue:`36348`.) -* The function :func:`platform.popen` has been removed, after having been deprecated since +* The function :func:`!platform.popen` has been removed, after having been deprecated since Python 3.3: use :func:`os.popen` instead. (Contributed by Victor Stinner in :issue:`35345`.) @@ -1894,9 +1895,11 @@ Changes in the Python API specialized methods like :meth:`~tkinter.ttk.Treeview.selection_set` for changing the selection. (Contributed by Serhiy Storchaka in :issue:`31508`.) -* The :meth:`writexml`, :meth:`toxml` and :meth:`toprettyxml` methods of - :mod:`xml.dom.minidom`, and the :meth:`write` method of :mod:`xml.etree`, - now preserve the attribute order specified by the user. +* The :meth:`~xml.dom.minidom.Node.writexml`, :meth:`~xml.dom.minidom.Node.toxml` + and :meth:`~xml.dom.minidom.Node.toprettyxml` methods of + :mod:`xml.dom.minidom` and the :meth:`~xml.etree.ElementTree.ElementTree.write` + method of :mod:`xml.etree.ElementTree` now preserve the attribute order + specified by the user. (Contributed by Diego Rojas and Raymond Hettinger in :issue:`34160`.) * A :mod:`dbm.dumb` database opened with flags ``'r'`` is now read-only. @@ -1916,8 +1919,8 @@ Changes in the Python API ``type.__new__``. A :exc:`DeprecationWarning` was emitted in Python 3.6--3.7. (Contributed by Serhiy Storchaka in :issue:`23722`.) -* The :class:`cProfile.Profile` class can now be used as a context - manager. (Contributed by Scott Sanderson in :issue:`29235`.) +* The :class:`cProfile.Profile ` class can now be used as + a context manager. (Contributed by Scott Sanderson in :issue:`29235`.) * :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`, :func:`shutil.copytree` and :func:`shutil.move` use platform-specific @@ -1952,7 +1955,7 @@ Changes in the Python API (Contributed by Christian Heimes in :issue:`17239`.) * Deleting a key from a read-only :mod:`dbm` database (:mod:`dbm.dumb`, - :mod:`dbm.gnu` or :mod:`dbm.ndbm`) raises :attr:`error` (:exc:`dbm.dumb.error`, + :mod:`dbm.gnu` or :mod:`dbm.ndbm`) raises :attr:`!error` (:exc:`dbm.dumb.error`, :exc:`dbm.gnu.error` or :exc:`dbm.ndbm.error`) instead of :exc:`KeyError`. (Contributed by Xiang Zhang in :issue:`33106`.) @@ -2044,7 +2047,7 @@ Changes in the C API :c:func:`PyType_FromSpec`) hold a reference to their type object. Increasing the reference count of these type objects has been moved from :c:func:`PyType_GenericAlloc` to the more low-level functions, - :c:func:`PyObject_Init` and :c:func:`PyObject_INIT`. + :c:func:`PyObject_Init` and :c:macro:`!PyObject_INIT`. This makes types created through :c:func:`PyType_FromSpec` behave like other classes in managed code. @@ -2064,7 +2067,7 @@ Changes in the C API This may happen after calling :c:macro:`PyObject_New`, :c:macro:`PyObject_NewVar`, :c:func:`PyObject_GC_New`, :c:func:`PyObject_GC_NewVar`, or any other custom allocator that uses - :c:func:`PyObject_Init` or :c:func:`PyObject_INIT`. + :c:func:`PyObject_Init` or :c:macro:`!PyObject_INIT`. Example: diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 896e8f4a489649..40d4a27bff9fee 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -423,8 +423,8 @@ digests. It skips MD5 on platforms that block MD5 digest. fcntl ----- -Added constants :const:`~fcntl.F_OFD_GETLK`, :const:`~fcntl.F_OFD_SETLK` -and :const:`~fcntl.F_OFD_SETLKW`. +Added constants :const:`!fcntl.F_OFD_GETLK`, :const:`!fcntl.F_OFD_SETLK` +and :const:`!fcntl.F_OFD_SETLKW`. (Contributed by Donghee Na in :issue:`38602`.) ftplib @@ -644,7 +644,7 @@ attribute. random ------ -Added a new :attr:`random.Random.randbytes` method: generate random bytes. +Added a new :meth:`random.Random.randbytes` method: generate random bytes. (Contributed by Victor Stinner in :issue:`40286`.) signal @@ -776,7 +776,7 @@ Optimizations :pep:`590` vectorcall protocol. (Contributed by Donghee Na, Mark Shannon, Jeroen Demeyer and Petr Viktorin in :issue:`37207`.) -* Optimized :func:`~set.difference_update` for the case when the other set +* Optimized :meth:`!set.difference_update` for the case when the other set is much larger than the base set. (Suggested by Evgeny Kapun with code contributed by Michele Orrù in :issue:`8425`.) diff --git a/Grammar/Tokens b/Grammar/Tokens index e40a4437afb009..0547e6ed08f79a 100644 --- a/Grammar/Tokens +++ b/Grammar/Tokens @@ -1,3 +1,8 @@ +# When adding new tokens, remember to update the PEG generator in +# Tools/peg_generator/pegen/parser_generator.py +# This will ensure that older versions of Python can generate a Python parser +# using "python -m pegen python ". + ENDMARKER NAME NUMBER diff --git a/Grammar/python.gram b/Grammar/python.gram index f3ef990923eec3..7b27b04bf7989e 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -94,14 +94,16 @@ func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMA # GENERAL STATEMENTS # ================== -statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) } +statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } -statement[asdl_stmt_seq*]: - | a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } +statement[asdl_stmt_seq*]: + | a=compound_stmt { _PyPegen_register_stmts(p , + (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) + ) } | a[asdl_stmt_seq*]=simple_stmts { a } single_compound_stmt[asdl_stmt_seq*]: - | a=compound_stmt { + | a=compound_stmt { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a)) } statement_newline[asdl_stmt_seq*]: @@ -449,9 +451,9 @@ except_block[excepthandler_ty]: _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } | 'except' e=expressions ':' b=block { CHECK_VERSION( - excepthandler_ty, - 14, - "except expressions without parentheses are", + excepthandler_ty, + 14, + "except expressions without parentheses are", _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | 'except' ':' b=block { _PyAST_ExceptHandler(NULL, NULL, b, EXTRA) } | invalid_except_stmt @@ -463,9 +465,9 @@ except_star_block[excepthandler_ty]: _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } | 'except' '*' e=expressions ':' b=block { CHECK_VERSION( - excepthandler_ty, - 14, - "except expressions without parentheses are", + excepthandler_ty, + 14, + "except expressions without parentheses are", _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | invalid_except_star_stmt finally_block[asdl_stmt_seq*]: @@ -977,15 +979,18 @@ tstring_middle[expr_ty]: | tstring_replacement_field | t=TSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) } tstring[expr_ty] (memo): - | a=TSTRING_START b=tstring_middle* c=TSTRING_END { + | a=TSTRING_START b=tstring_middle* c=TSTRING_END { CHECK_VERSION( - expr_ty, - 14, - "t-strings are", + expr_ty, + 14, + "t-strings are", _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c)) } string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) } -strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string|tstring)+ { _PyPegen_concatenate_strings(p, a, EXTRA) } +strings[expr_ty] (memo): + | invalid_string_tstring_concat + | a[asdl_expr_seq*]=(fstring|string)+ { _PyPegen_concatenate_strings(p, a, EXTRA) } + | a[asdl_expr_seq*]=tstring+ { _PyPegen_concatenate_tstrings(p, a, EXTRA) } list[expr_ty]: | '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) } @@ -1382,12 +1387,12 @@ invalid_import: | 'import' token=NEWLINE { RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } invalid_dotted_as_name: - | dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + | dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) a=expression { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use %s as import target", _PyPegen_get_expr_name(a)) } invalid_import_from_as_name: - | NAME 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + | NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) a=expression { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use %s as import target", _PyPegen_get_expr_name(a)) } invalid_import_from_targets: @@ -1418,7 +1423,7 @@ invalid_except_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } - | 'except' expression 'as' a=expression { + | 'except' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except statement with %s", _PyPegen_get_expr_name(a)) } invalid_except_star_stmt: @@ -1426,7 +1431,7 @@ invalid_except_star_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' '*' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' '*' (NEWLINE | ':') { RAISE_SYNTAX_ERROR("expected one or more exception types") } - | 'except' '*' expression 'as' a=expression { + | 'except' '*' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except* statement with %s", _PyPegen_get_expr_name(a)) } invalid_finally_stmt: @@ -1546,6 +1551,12 @@ invalid_tstring_conversion_character: | '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: missing conversion character") } | '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: invalid conversion character") } +invalid_string_tstring_concat: + | a=(fstring|string)+ b[expr_ty]=tstring { + RAISE_SYNTAX_ERROR_KNOWN_RANGE(PyPegen_last_item(a, expr_ty), b, "cannot mix t-string literals with string or bytes literals") } + | a=tstring+ b[expr_ty]=(fstring|string) { + RAISE_SYNTAX_ERROR_KNOWN_RANGE(PyPegen_last_item(a, expr_ty), b, "cannot mix t-string literals with string or bytes literals") } + invalid_arithmetic: | sum ('+'|'-'|'*'|'/'|'%'|'//'|'@') a='not' b=inversion { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "'not' after an operator must be parenthesized") } invalid_factor: diff --git a/Include/Python.h b/Include/Python.h index 64be80145890a3..db8165e4b2e858 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -16,6 +16,7 @@ // Include standard header files +// When changing these files, remember to update Doc/extending/extending.rst. #include // assert() #include // uintptr_t #include // INT_MAX @@ -59,6 +60,14 @@ # include // __readgsqword() #endif +// Suppress known warnings in Python header files. +#if defined(_MSC_VER) +// Warning that alignas behaviour has changed. Doesn't affect us, because we +// never relied on the old behaviour. +#pragma warning(push) +#pragma warning(disable: 5274) +#endif + // Include Python header files #include "pyport.h" #include "pymacro.h" @@ -138,4 +147,9 @@ #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" +// Restore warning filter +#ifdef _MSC_VER +#pragma warning(pop) +#endif + #endif /* !Py_PYTHON_H */ diff --git a/Include/abstract.h b/Include/abstract.h index b9199fc03a399a..80f3298701d249 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -138,7 +138,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. - This is the equivalent of the Python statement: del o.attr_name. */ + This is the equivalent of the Python statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttrString(O, A) PyObject_SetAttrString((O), (A), NULL) +#endif /* Implemented elsewhere: @@ -147,7 +152,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. This is the equivalent of the Python - statement: del o.attr_name. */ + statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttr(O, A) PyObject_SetAttr((O), (A), NULL) +#endif /* Implemented elsewhere: diff --git a/Include/audit.h b/Include/audit.h index 793b7077e1027b..9be54ad4411096 100644 --- a/Include/audit.h +++ b/Include/audit.h @@ -1,5 +1,5 @@ -#ifndef Py_AUDIT_H -#define Py_AUDIT_H +#ifndef _Py_AUDIT_H +#define _Py_AUDIT_H #ifdef __cplusplus extern "C" { #endif @@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple( #ifndef Py_LIMITED_API -# define Py_CPYTHON_AUDIT_H +# define _Py_CPYTHON_AUDIT_H # include "cpython/audit.h" -# undef Py_CPYTHON_AUDIT_H +# undef _Py_CPYTHON_AUDIT_H #endif #ifdef __cplusplus } #endif -#endif /* !Py_AUDIT_H */ +#endif /* !_Py_AUDIT_H */ diff --git a/Include/boolobject.h b/Include/boolobject.h index 3037e61bbf6d0c..b56e2baecaa36c 100644 --- a/Include/boolobject.h +++ b/Include/boolobject.h @@ -34,9 +34,16 @@ PyAPI_FUNC(int) Py_IsTrue(PyObject *x); PyAPI_FUNC(int) Py_IsFalse(PyObject *x); #define Py_IsFalse(x) Py_Is((x), Py_False) -/* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_True -#define Py_RETURN_FALSE return Py_False +/* Macros for returning Py_True or Py_False, respectively. + * Only treat Py_True and Py_False as immortal in the limited C API 3.12 + * and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_TRUE return Py_NewRef(Py_True) +# define Py_RETURN_FALSE return Py_NewRef(Py_False) +#else +# define Py_RETURN_TRUE return Py_True +# define Py_RETURN_FALSE return Py_False +#endif /* Function to return a bool from a C long */ PyAPI_FUNC(PyObject *) PyBool_FromLong(long); diff --git a/Include/ceval.h b/Include/ceval.h index 32ab38972e548f..e9df8684996e23 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); #define FVS_MASK 0x4 #define FVS_HAVE_SPEC 0x4 -/* Special methods used by LOAD_SPECIAL */ -#define SPECIAL___ENTER__ 0 -#define SPECIAL___EXIT__ 1 -#define SPECIAL___AENTER__ 2 -#define SPECIAL___AEXIT__ 3 -#define SPECIAL_MAX 3 - #ifndef Py_LIMITED_API # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h index 3c5c7a8c06091d..536f9248632097 100644 --- a/Include/cpython/audit.h +++ b/Include/cpython/audit.h @@ -1,4 +1,4 @@ -#ifndef Py_CPYTHON_AUDIT_H +#ifndef _Py_CPYTHON_AUDIT_H # error "this header file must not be included directly" #endif diff --git a/Include/cpython/critical_section.h b/Include/cpython/critical_section.h index 35db3fb6a59ce6..4fc46fefb93a24 100644 --- a/Include/cpython/critical_section.h +++ b/Include/cpython/critical_section.h @@ -73,22 +73,32 @@ typedef struct PyCriticalSection2 PyCriticalSection2; PyAPI_FUNC(void) PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op); +PyAPI_FUNC(void) +PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m); + PyAPI_FUNC(void) PyCriticalSection_End(PyCriticalSection *c); PyAPI_FUNC(void) PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b); +PyAPI_FUNC(void) +PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2); + PyAPI_FUNC(void) PyCriticalSection2_End(PyCriticalSection2 *c); #ifndef Py_GIL_DISABLED # define Py_BEGIN_CRITICAL_SECTION(op) \ { +# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \ + { # define Py_END_CRITICAL_SECTION() \ } # define Py_BEGIN_CRITICAL_SECTION2(a, b) \ { +# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \ + { # define Py_END_CRITICAL_SECTION2() \ } #else /* !Py_GIL_DISABLED */ @@ -118,6 +128,11 @@ struct PyCriticalSection2 { PyCriticalSection _py_cs; \ PyCriticalSection_Begin(&_py_cs, _PyObject_CAST(op)) +# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \ + { \ + PyCriticalSection _py_cs; \ + PyCriticalSection_BeginMutex(&_py_cs, mutex) + # define Py_END_CRITICAL_SECTION() \ PyCriticalSection_End(&_py_cs); \ } @@ -127,6 +142,11 @@ struct PyCriticalSection2 { PyCriticalSection2 _py_cs2; \ PyCriticalSection2_Begin(&_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) +# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) + # define Py_END_CRITICAL_SECTION2() \ PyCriticalSection2_End(&_py_cs2); \ } diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 18249b95befe65..598cd330bc9ca9 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) -static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { - return _PyFunction_CAST(func)->func_builtins; -} -#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) - static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/cpython/lock.h b/Include/cpython/lock.h index 8ee03e82f74dfd..63886fca28eae2 100644 --- a/Include/cpython/lock.h +++ b/Include/cpython/lock.h @@ -36,6 +36,9 @@ PyAPI_FUNC(void) PyMutex_Lock(PyMutex *m); // exported function for unlocking the mutex PyAPI_FUNC(void) PyMutex_Unlock(PyMutex *m); +// exported function for checking if the mutex is locked +PyAPI_FUNC(int) PyMutex_IsLocked(PyMutex *m); + // Locks the mutex. // // If the mutex is currently locked, the calling thread will be parked until @@ -61,3 +64,11 @@ _PyMutex_Unlock(PyMutex *m) } } #define PyMutex_Unlock _PyMutex_Unlock + +// Checks if the mutex is currently locked. +static inline int +_PyMutex_IsLocked(PyMutex *m) +{ + return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; +} +#define PyMutex_IsLocked _PyMutex_IsLocked diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 2a0c11e7b3ad66..790640309f1e03 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -591,6 +591,17 @@ static inline void _Py_atomic_fence_release(void); // --- aliases --------------------------------------------------------------- +// Compilers don't really support "consume" semantics, so we fake it. Use +// "acquire" with TSan to support false positives. Use "relaxed" otherwise, +// because CPUs on all platforms we support respect address dependencies without +// extra barriers. +// See 2.6.7 in https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf +#if defined(_Py_THREAD_SANITIZER) +# define _Py_atomic_load_ptr_consume _Py_atomic_load_ptr_acquire +#else +# define _Py_atomic_load_ptr_consume _Py_atomic_load_ptr_relaxed +#endif + #if SIZEOF_LONG == 8 # define _Py_atomic_load_ulong(p) \ _Py_atomic_load_uint64((uint64_t *)p) diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 69a8b9e615ea5f..7176f667a4082c 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -948,14 +948,6 @@ _Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value) memory_order_relaxed); } -static inline void -_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) -{ - _Py_USING_STD; - atomic_store_explicit((_Atomic(unsigned int)*)obj, value, - memory_order_relaxed); -} - static inline void _Py_atomic_store_long_relaxed(long *obj, long value) { @@ -1031,6 +1023,14 @@ _Py_atomic_store_int_release(int *obj, int value) memory_order_release); } +static inline void +_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned int)*)obj, value, + memory_order_release); +} + static inline void _Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value) { diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7f1bc363861ddf..f428396411c5e5 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 /* Remote debugger support */ -#define MAX_SCRIPT_PATH_SIZE 512 -typedef struct _remote_debugger_support { +#define _Py_MAX_SCRIPT_PATH_SIZE 512 +typedef struct { int32_t debugger_pending_call; - char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; + char debugger_script_path[_Py_MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; typedef struct _err_stackitem { @@ -61,6 +61,8 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +/* Minimum size of data stack chunk */ +#define _PY_DATA_STACK_CHUNK_SIZE (16*1024) struct _ts { /* See Python/ceval.c for comments explaining most fields */ @@ -241,6 +243,18 @@ PyAPI_FUNC(int) PyGILState_Check(void); */ PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void); +// Set the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(int) PyUnstable_ThreadState_SetStackProtection( + PyThreadState *tstate, + void *stack_start_addr, // Stack start address + size_t stack_size); // Stack size (in bytes) + +// Reset the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStackProtection( + PyThreadState *tstate); + /* Routines for advanced debuggers, requested by David Beazley. Don't use unless you know what you are doing! */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void); diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 136f5d5c5f8425..3d0414f5291fe4 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8( PyUnicodeWriter *writer, const char *str, Py_ssize_t size); +PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII( + PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size); PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar( PyUnicodeWriter *writer, const wchar_t *str, diff --git a/Include/cpython/warnings.h b/Include/cpython/warnings.h index 8731fd2e96b716..4e3eb88e8ff447 100644 --- a/Include/cpython/warnings.h +++ b/Include/cpython/warnings.h @@ -18,9 +18,3 @@ PyAPI_FUNC(int) PyErr_WarnExplicitFormat( // DEPRECATED: Use PyErr_WarnEx() instead. #define PyErr_Warn(category, msg) PyErr_WarnEx((category), (msg), 1) - -int _PyErr_WarnExplicitObjectWithContext( - PyObject *category, - PyObject *message, - PyObject *filename, - int lineno); diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index d97f51b8eefbe5..a7daa3a40a4c0b 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -634,10 +634,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST - next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next), keys); #else MI_UNUSED(keys); MI_UNUSED(null); - next = (mi_block_t*)block->next; + next = (mi_block_t*)mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; @@ -646,10 +646,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST - block->next = mi_ptr_encode(null, next, keys); + mi_atomic_store_relaxed(&block->next, mi_ptr_encode(null, next, keys)); #else MI_UNUSED(keys); MI_UNUSED(null); - block->next = (mi_encoded_t)next; + mi_atomic_store_relaxed(&block->next, (mi_encoded_t)next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); } diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 354839ba955b36..70c600e920bad1 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -235,7 +235,7 @@ typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { - mi_encoded_t next; + _Atomic(mi_encoded_t) next; } mi_block_t; @@ -678,7 +678,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // Thread Local data // ------------------------------------------------------ -// A "span" is is an available range of slices. The span queues keep +// A "span" is an available range of slices. The span queues keep // track of slice spans of at most the given `slice_count` (but more than the previous size class). typedef struct mi_span_queue_s { mi_slice_t* first; diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..8ea9b3ebb88454 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -20,8 +20,9 @@ extern PyObject* _PyBytes_FromHex( // Helper for PyBytes_DecodeEscape that detects invalid escape chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t, - const char *, const char **); +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, + const char *, + int *, const char **); // Substring Search. diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 3d8247df31ce32..89504f6bfbbfb7 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -22,8 +22,10 @@ struct _ceval_runtime_state; // Export for '_lsprof' shared extension PyAPI_FUNC(int) _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg); +extern int _PyEval_SetProfileAllThreads(PyInterpreterState *interp, Py_tracefunc func, PyObject *arg); extern int _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg); +extern int _PyEval_SetTraceAllThreads(PyInterpreterState *interp, Py_tracefunc func, PyObject *arg); extern int _PyEval_SetOpcodeTrace(PyFrameObject *f, bool enable); @@ -199,7 +201,14 @@ extern void _PyEval_DeactivateOpCache(void); static inline int _Py_MakeRecCheck(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - return here_addr < _tstate->c_stack_soft_limit; + // Overflow if stack pointer is between soft limit and the base of the hardware stack. + // If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing. + // We could have the wrong stack limits because of limited platform support, or user-space threads. +#if _Py_STACK_GROWS_DOWN + return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES; +#else + return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES; +#endif } // Export for '_json' shared extension, used via _Py_EnterRecursiveCall() @@ -231,7 +240,11 @@ static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); +#if _Py_STACK_GROWS_DOWN return here_addr <= _tstate->c_stack_soft_limit; +#else + return here_addr >= _tstate->c_stack_soft_limit; +#endif } static inline void _Py_LeaveRecursiveCall(void) { @@ -239,6 +252,16 @@ static inline void _Py_LeaveRecursiveCall(void) { extern _PyInterpreterFrame* _PyEval_GetFrame(void); +extern PyObject * _PyEval_GetGlobalsFromRunningMain(PyThreadState *); +extern int _PyEval_EnsureBuiltins( + PyThreadState *, + PyObject *, + PyObject **p_builtins); +extern int _PyEval_EnsureBuiltinsWithModule( + PyThreadState *, + PyObject *, + PyObject **p_builtins); + PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request @@ -273,6 +296,7 @@ PyAPI_FUNC(PyObject *) _PyEval_ImportName(PyThreadState *, _PyInterpreterFrame * PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); PyAPI_FUNC(void) _PyEval_MonitorRaise(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr); +PyAPI_FUNC(bool) _PyEval_NoToolsForUnwind(PyThreadState *tstate); PyAPI_FUNC(int) _PyEval_UnpackIterableStackRef(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, _PyStackRef *sp); PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); PyAPI_FUNC(PyObject **) _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_ssize_t nargs, PyObject **scratch); @@ -353,6 +377,13 @@ PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyS extern int _PyRunRemoteDebugger(PyThreadState *tstate); #endif +/* Special methods used by LOAD_SPECIAL */ +#define SPECIAL___ENTER__ 0 +#define SPECIAL___EXIT__ 1 +#define SPECIAL___AENTER__ 2 +#define SPECIAL___AEXIT__ 3 +#define SPECIAL_MAX 3 + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b135e30b7ad62a..dc0ea9c669b731 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -275,6 +275,13 @@ extern void _PyLineTable_InitAddressRange( extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); +// Similar to PyCode_Addr2Line(), but return -1 if the code object is invalid +// and can be called without an attached tstate. Used by dump_frame() in +// Python/traceback.c. The function uses heuristics to detect freed memory, +// it's not 100% reliable. +extern int _PyCode_SafeAddr2Line(PyCodeObject *co, int addr); + + /** API for executors */ extern void _PyCode_Clear_Executors(PyCodeObject *code); @@ -614,6 +621,47 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( PyObject *globalsns, PyObject *builtinsns); + +/* "Stateless" code is a function or code object which does not rely on + * external state or internal state. It may rely on arguments and + * builtins, but not globals or a closure. Thus it does not rely + * on __globals__ or __closure__, and a stateless function + * is equivalent to its code object. + * + * Stateless code also does not keep any persistent state + * of its own, so it can't have any executors, monitoring, + * instrumentation, or "extras" (i.e. co_extra). + * + * Stateless code may create nested functions, including closures. + * However, nested functions must themselves be stateless, except they + * *can* close on the enclosing locals. + * + * Stateless code may return any value, including nested functions and closures. + * + * Stateless code that takes no arguments and doesn't return anything + * may be treated like a script. + * + * We consider stateless code to be "portable" if it does not return + * any object that holds a reference to any of the code's locals. Thus + * generators and coroutines are not portable. Likewise a function + * that returns a closure is not portable. The concept of + * portability is useful in cases where the code is run + * in a different execution context than where + * the return value will be used. */ + +PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **); +PyAPI_FUNC(int) _PyCode_CheckNoExternalState( + PyCodeObject *, + _PyCode_var_counts_t *, + const char **); +PyAPI_FUNC(int) _PyCode_VerifyStateless( + PyThreadState *, + PyCodeObject *, + PyObject *globalnames, + PyObject *globalsns, + PyObject *builtinsns); + +PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **); PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index aecc50be1e6c34..ed776bbb73a0ee 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -49,7 +49,8 @@ extern int _PyAST_Preprocess( PyObject *filename, int optimize, int ff_features, - int syntax_check_only); + int syntax_check_only, + int enable_warnings); typedef struct { diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 42f06b935bd0a0..2b49b9f00df3ea 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -21,16 +21,6 @@ extern "C" { #define _Py_CRITICAL_SECTION_MASK 0x3 #ifdef Py_GIL_DISABLED -# define Py_BEGIN_CRITICAL_SECTION_MUT(mutex) \ - { \ - PyCriticalSection _py_cs; \ - _PyCriticalSection_BeginMutex(&_py_cs, mutex) - -# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) \ - { \ - PyCriticalSection2 _py_cs2; \ - _PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) - // Specialized version of critical section locking to safely use // PySequence_Fast APIs without the GIL. For performance, the argument *to* // PySequence_Fast() is provided to the macro, not the *result* of @@ -75,8 +65,6 @@ extern "C" { #else /* !Py_GIL_DISABLED */ // The critical section APIs are no-ops with the GIL. -# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) { -# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) { # define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) { # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() } # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) @@ -119,6 +107,7 @@ _PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) _PyCriticalSection_BeginSlow(c, m); } } +#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex static inline void _PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) @@ -194,6 +183,7 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) _PyCriticalSection2_BeginSlow(c, m1, m2, 0); } } +#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex static inline void _PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 9de61ef54125d5..81faffac194171 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -131,7 +131,23 @@ PyAPI_FUNC(void) _PyXIData_Clear(PyInterpreterState *, _PyXIData_t *); /* getting cross-interpreter data */ -typedef int (*xidatafunc)(PyThreadState *tstate, PyObject *, _PyXIData_t *); +typedef int xidata_fallback_t; +#define _PyXIDATA_XIDATA_ONLY (0) +#define _PyXIDATA_FULL_FALLBACK (1) + +// Technically, we don't need two different function types; +// we could go with just the fallback one. However, only container +// types like tuple need it, so always having the extra arg would be +// a bit unfortunate. It's also nice to be able to clearly distinguish +// between types that might call _PyObject_GetXIData() and those that won't. +// +typedef int (*xidatafunc)(PyThreadState *, PyObject *, _PyXIData_t *); +typedef int (*xidatafbfunc)( + PyThreadState *, PyObject *, xidata_fallback_t, _PyXIData_t *); +typedef struct { + xidatafunc basic; + xidatafbfunc fallback; +} _PyXIData_getdata_t; PyAPI_FUNC(PyObject *) _PyXIData_GetNotShareableErrorType(PyThreadState *); PyAPI_FUNC(void) _PyXIData_SetNotShareableError(PyThreadState *, const char *); @@ -140,16 +156,21 @@ PyAPI_FUNC(void) _PyXIData_FormatNotShareableError( const char *, ...); -PyAPI_FUNC(xidatafunc) _PyXIData_Lookup( +PyAPI_FUNC(_PyXIData_getdata_t) _PyXIData_Lookup( PyThreadState *, PyObject *); PyAPI_FUNC(int) _PyObject_CheckXIData( PyThreadState *, PyObject *); +PyAPI_FUNC(int) _PyObject_GetXIDataNoFallback( + PyThreadState *, + PyObject *, + _PyXIData_t *); PyAPI_FUNC(int) _PyObject_GetXIData( PyThreadState *, PyObject *, + xidata_fallback_t, _PyXIData_t *); // _PyObject_GetXIData() for bytes @@ -191,6 +212,21 @@ PyAPI_FUNC(int) _PyCode_GetXIData( PyThreadState *, PyObject *, _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetPureScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + +// _PyObject_GetXIData() for functions +PyAPI_FUNC(PyObject *) _PyFunction_FromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyFunction_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); /* using cross-interpreter data */ @@ -267,10 +303,10 @@ typedef struct _excinfo { const char *errdisplay; } _PyXI_excinfo; -PyAPI_FUNC(int) _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc); +PyAPI_FUNC(_PyXI_excinfo *) _PyXI_NewExcInfo(PyObject *exc); +PyAPI_FUNC(void) _PyXI_FreeExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_FormatExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_ExcInfoAsObject(_PyXI_excinfo *info); -PyAPI_FUNC(void) _PyXI_ClearExcInfo(_PyXI_excinfo *info); typedef enum error_code { @@ -281,42 +317,30 @@ typedef enum error_code { _PyXI_ERR_ALREADY_RUNNING = -4, _PyXI_ERR_MAIN_NS_FAILURE = -5, _PyXI_ERR_APPLY_NS_FAILURE = -6, - _PyXI_ERR_NOT_SHAREABLE = -7, + _PyXI_ERR_PRESERVE_FAILURE = -7, + _PyXI_ERR_EXC_PROPAGATION_FAILURE = -8, + _PyXI_ERR_NOT_SHAREABLE = -9, } _PyXI_errcode; +typedef struct xi_failure _PyXI_failure; -typedef struct _sharedexception { - // The originating interpreter. - PyInterpreterState *interp; - // The kind of error to propagate. - _PyXI_errcode code; - // The exception information to propagate, if applicable. - // This is populated only for some error codes, - // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. - _PyXI_excinfo uncaught; -} _PyXI_error; - -PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); - - -typedef struct xi_session _PyXI_session; -typedef struct _sharedns _PyXI_namespace; +PyAPI_FUNC(_PyXI_failure *) _PyXI_NewFailure(void); +PyAPI_FUNC(void) _PyXI_FreeFailure(_PyXI_failure *); +PyAPI_FUNC(_PyXI_errcode) _PyXI_GetFailureCode(_PyXI_failure *); +PyAPI_FUNC(int) _PyXI_InitFailure(_PyXI_failure *, _PyXI_errcode, PyObject *); +PyAPI_FUNC(void) _PyXI_InitFailureUTF8( + _PyXI_failure *, + _PyXI_errcode, + const char *); -PyAPI_FUNC(void) _PyXI_FreeNamespace(_PyXI_namespace *ns); -PyAPI_FUNC(_PyXI_namespace *) _PyXI_NamespaceFromNames(PyObject *names); -PyAPI_FUNC(int) _PyXI_FillNamespaceFromDict( - _PyXI_namespace *ns, - PyObject *nsobj, - _PyXI_session *session); -PyAPI_FUNC(int) _PyXI_ApplyNamespace( - _PyXI_namespace *ns, - PyObject *nsobj, - PyObject *dflt); +PyAPI_FUNC(int) _PyXI_UnwrapNotShareableError( + PyThreadState *, + _PyXI_failure *); // A cross-interpreter session involves entering an interpreter -// (_PyXI_Enter()), doing some work with it, and finally exiting -// that interpreter (_PyXI_Exit()). +// with _PyXI_Enter(), doing some work with it, and finally exiting +// that interpreter with _PyXI_Exit(). // // At the boundaries of the session, both entering and exiting, // data may be exchanged between the previous interpreter and the @@ -324,48 +348,40 @@ PyAPI_FUNC(int) _PyXI_ApplyNamespace( // isolation between interpreters. This includes setting objects // in the target's __main__ module on the way in, and capturing // uncaught exceptions on the way out. -struct xi_session { - // Once a session has been entered, this is the tstate that was - // current before the session. If it is different from cur_tstate - // then we must have switched interpreters. Either way, this will - // be the current tstate once we exit the session. - PyThreadState *prev_tstate; - // Once a session has been entered, this is the current tstate. - // It must be current when the session exits. - PyThreadState *init_tstate; - // This is true if init_tstate needs cleanup during exit. - int own_init_tstate; - - // This is true if, while entering the session, init_thread took - // "ownership" of the interpreter's __main__ module. This means - // it is the only thread that is allowed to run code there. - // (Caveat: for now, users may still run exec() against the - // __main__ module's dict, though that isn't advisable.) - int running; - // This is a cached reference to the __dict__ of the entered - // interpreter's __main__ module. It is looked up when at the - // beginning of the session as a convenience. - PyObject *main_ns; - - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *error_override; - // This is set if exit captured an exception to propagate. - _PyXI_error *error; - - // -- pre-allocated memory -- - _PyXI_error _error; - _PyXI_errcode _error_override; -}; +typedef struct xi_session _PyXI_session; + +PyAPI_FUNC(_PyXI_session *) _PyXI_NewSession(void); +PyAPI_FUNC(void) _PyXI_FreeSession(_PyXI_session *); + +typedef struct { + PyObject *preserved; + PyObject *excinfo; + _PyXI_errcode errcode; +} _PyXI_session_result; +PyAPI_FUNC(void) _PyXI_ClearResult(_PyXI_session_result *); PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session *session, PyInterpreterState *interp, - PyObject *nsupdates); -PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); - -PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); -PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); + PyObject *nsupdates, + _PyXI_session_result *); +PyAPI_FUNC(int) _PyXI_Exit( + _PyXI_session *, + _PyXI_failure *, + _PyXI_session_result *); + +PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace( + _PyXI_session *, + _PyXI_failure *); + +PyAPI_FUNC(int) _PyXI_Preserve( + _PyXI_session *, + const char *, + PyObject *, + _PyXI_failure *); +PyAPI_FUNC(PyObject *) _PyXI_GetPreserved( + _PyXI_session_result *, + const char *); /*************/ diff --git a/Include/internal/pycore_crossinterp_data_registry.h b/Include/internal/pycore_crossinterp_data_registry.h index 8f4bcb948e5a45..fbb4cad5cac32e 100644 --- a/Include/internal/pycore_crossinterp_data_registry.h +++ b/Include/internal/pycore_crossinterp_data_registry.h @@ -17,7 +17,7 @@ typedef struct _xid_regitem { /* This is NULL for builtin types. */ PyObject *weakref; size_t refcount; - xidatafunc getdata; + _PyXIData_getdata_t getdata; } _PyXIData_regitem_t; typedef struct { @@ -30,7 +30,7 @@ typedef struct { PyAPI_FUNC(int) _PyXIData_RegisterClass( PyThreadState *, PyTypeObject *, - xidatafunc); + _PyXIData_getdata_t); PyAPI_FUNC(int) _PyXIData_UnregisterClass( PyThreadState *, PyTypeObject *); diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 59d2c9d5377953..8e7cd16acffa48 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -52,9 +52,15 @@ extern "C" { #ifdef Py_GIL_DISABLED # define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) # define _Py_Debug_Free_Threaded 1 +# define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) +# define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) +# define _Py_Debug_interpreter_state_tlbc_generation offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 +# define _Py_Debug_code_object_co_tlbc 0 +# define _Py_Debug_interpreter_frame_tlbc_index 0 +# define _Py_Debug_interpreter_state_tlbc_generation 0 #endif @@ -85,6 +91,8 @@ typedef struct _Py_DebugOffsets { uint64_t gil_runtime_state_enabled; uint64_t gil_runtime_state_locked; uint64_t gil_runtime_state_holder; + uint64_t code_object_generation; + uint64_t tlbc_generation; } interpreter_state; // Thread state offset; @@ -109,6 +117,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplus; uint64_t owner; uint64_t stackpointer; + uint64_t tlbc_index; } interpreter_frame; // Code object offset; @@ -123,6 +132,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplusnames; uint64_t localspluskinds; uint64_t co_code_adaptive; + uint64_t co_tlbc; } code_object; // PyObject offset; @@ -210,6 +220,11 @@ typedef struct _Py_DebugOffsets { uint64_t gi_frame_state; } gen_object; + struct _llist_node { + uint64_t next; + uint64_t prev; + } llist_node; + struct _debugger_support { uint64_t eval_breaker; uint64_t remote_debugger_support; @@ -245,6 +260,8 @@ typedef struct _Py_DebugOffsets { .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + .code_object_generation = offsetof(PyInterpreterState, _code_object_generation), \ + .tlbc_generation = _Py_Debug_interpreter_state_tlbc_generation, \ }, \ .thread_state = { \ .size = sizeof(PyThreadState), \ @@ -265,6 +282,7 @@ typedef struct _Py_DebugOffsets { .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ .owner = offsetof(_PyInterpreterFrame, owner), \ .stackpointer = offsetof(_PyInterpreterFrame, stackpointer), \ + .tlbc_index = _Py_Debug_interpreter_frame_tlbc_index, \ }, \ .code_object = { \ .size = sizeof(PyCodeObject), \ @@ -277,6 +295,7 @@ typedef struct _Py_DebugOffsets { .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + .co_tlbc = _Py_Debug_code_object_co_tlbc, \ }, \ .pyobject = { \ .size = sizeof(PyObject), \ @@ -339,13 +358,17 @@ typedef struct _Py_DebugOffsets { .gi_iframe = offsetof(PyGenObject, gi_iframe), \ .gi_frame_state = offsetof(PyGenObject, gi_frame_state), \ }, \ + .llist_node = { \ + .next = offsetof(struct llist_node, next), \ + .prev = offsetof(struct llist_node, prev), \ + }, \ .debugger_support = { \ .eval_breaker = offsetof(PyThreadState, eval_breaker), \ .remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \ .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ - .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + .debugger_script_path_size = _Py_MAX_SCRIPT_PATH_SIZE, \ }, \ } diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 754eb88a85c33c..afd468735b4893 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -30,6 +30,10 @@ PyAPI_FUNC(int) _PyDict_SetItem_KnownHash(PyObject *mp, PyObject *key, // Export for '_asyncio' shared extension PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key, Py_hash_t hash); + +extern int _PyDict_DelItem_KnownHash_LockHeld(PyObject *mp, PyObject *key, + Py_hash_t hash); + extern int _PyDict_Contains_KnownHash(PyObject *, PyObject *, Py_hash_t); // "Id" variants @@ -47,6 +51,8 @@ extern int _PyDict_HasOnlyStringKeys(PyObject *mp); // Export for '_ctypes' shared extension PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *); +extern Py_ssize_t _PyDict_SizeOf_LockHeld(PyDictObject *); + #define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL) /* Like PyDict_Merge, but override can be 0, 1 or 2. If override is 0, @@ -150,6 +156,8 @@ extern int _PyDict_Pop_KnownHash( Py_hash_t hash, PyObject **result); +extern void _PyDict_Clear_LockHeld(PyObject *op); + #ifdef Py_GIL_DISABLED PyAPI_FUNC(void) _PyDict_EnsureSharedOnRead(PyDictObject *mp); #endif diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 209252b2ddc0de..6e1209659565a3 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -35,6 +35,18 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); + +/* See pycore_code.h for explanation about what "stateless" means. */ + +PyAPI_FUNC(int) +_PyFunction_VerifyStateless(PyThreadState *, PyObject *); + +static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index a6519aa086309d..fd284d0e4ecc2f 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -205,6 +205,12 @@ static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) { #endif } +extern void _Py_ScheduleGC(PyThreadState *tstate); + +#ifndef Py_GIL_DISABLED +extern void _Py_TriggerGC(struct _gc_runtime_state *gcstate); +#endif + /* Tell the GC to track this object. * @@ -238,14 +244,19 @@ static inline void _PyObject_GC_TRACK( "object is in generation which is garbage collected", filename, lineno, __func__); - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyGC_Head *generation0 = &interp->gc.young.head; + struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc; + PyGC_Head *generation0 = &gcstate->young.head; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - uintptr_t not_visited = 1 ^ interp->gc.visited_space; + uintptr_t not_visited = 1 ^ gcstate->visited_space; gc->_gc_next = ((uintptr_t)generation0) | not_visited; generation0->_gc_prev = (uintptr_t)gc; + gcstate->young.count++; /* number of tracked GC objects */ + gcstate->heap_size++; + if (gcstate->young.count > gcstate->young.threshold) { + _Py_TriggerGC(gcstate); + } #endif } @@ -280,6 +291,11 @@ static inline void _PyObject_GC_UNTRACK( _PyGCHead_SET_PREV(next, prev); gc->_gc_next = 0; gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; + struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc; + if (gcstate->young.count > 0) { + gcstate->young.count--; + } + gcstate->heap_size--; #endif } @@ -343,7 +359,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); -extern void _Py_ScheduleGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate); union _PyStackRef; diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 9bde4faaf5a040..5997858378cfe0 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -796,6 +796,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); @@ -893,6 +894,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(debug)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(default)); @@ -904,7 +906,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict_content)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dictcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest)); @@ -1139,6 +1140,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset_src)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(on_type_read)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(onceregistry)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_active_thread)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_keys)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(oparg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcode)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3a83fd6b6042e2..bab5da70f3400c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -287,6 +287,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) + STRUCT_FOR_ID(all_threads) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(any) STRUCT_FOR_ID(append) @@ -384,6 +385,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(data) STRUCT_FOR_ID(database) STRUCT_FOR_ID(day) + STRUCT_FOR_ID(debug) STRUCT_FOR_ID(decode) STRUCT_FOR_ID(decoder) STRUCT_FOR_ID(default) @@ -395,7 +397,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(deterministic) STRUCT_FOR_ID(device) STRUCT_FOR_ID(dict) - STRUCT_FOR_ID(dict_content) STRUCT_FOR_ID(dictcomp) STRUCT_FOR_ID(difference_update) STRUCT_FOR_ID(digest) @@ -630,6 +631,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(offset_src) STRUCT_FOR_ID(on_type_read) STRUCT_FOR_ID(onceregistry) + STRUCT_FOR_ID(only_active_thread) STRUCT_FOR_ID(only_keys) STRUCT_FOR_ID(oparg) STRUCT_FOR_ID(opcode) diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 368dafb90635d7..183b2d45c5ede1 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -153,10 +153,8 @@ typedef enum { } _PyConfigInitEnum; typedef enum { - /* For now, this means the GIL is enabled. - - gh-116329: This will eventually change to "the GIL is disabled but can - be re-enabled by loading an incompatible extension module." */ + /* In free threaded builds, this means that the GIL is disabled at startup, + but may be enabled by loading an incompatible extension module. */ _PyConfig_GIL_DEFAULT = -1, /* The GIL has been forced off or on, and will not be affected by module loading. */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index c3e6c77405bfe7..15885b5f228c8b 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -97,7 +97,7 @@ struct _ceval_runtime_state { // For example, we use a preallocated array // for the list of pending calls. struct _pending_calls pending_mainthread; - PyMutex sys_trace_profile_mutex; + PyMutex unused_sys_trace_profile_mutex; // kept for ABI compatibility }; @@ -726,6 +726,10 @@ typedef struct _PyIndexPool { // Next index to allocate if no free indices are available int32_t next_index; + + // Generation counter incremented on thread creation/destruction + // Used for TLBC cache invalidation in remote debugging + uint32_t tlbc_generation; } _PyIndexPool; typedef union _Py_unique_id_entry { @@ -764,10 +768,7 @@ struct _is { * and should be placed at the beginning. */ struct _ceval_state ceval; - /* This structure is carefully allocated so that it's correctly aligned - * to avoid undefined behaviors during LOAD and STORE. The '_malloced' - * field stores the allocated pointer address that will later be freed. - */ + // unused, kept for ABI compatibility void *_malloced; PyInterpreterState *next; @@ -843,6 +844,8 @@ struct _is { /* The per-interpreter GIL, which might not be used. */ struct _gil_runtime_state _gil; + uint64_t _code_object_generation; + /* ---------- IMPORTANT --------------------------- The fields above this line are declared as early as possible to facilitate out-of-process observability @@ -940,8 +943,8 @@ struct _is { PyDict_WatchCallback builtins_dict_watcher; _Py_GlobalMonitors monitors; - bool sys_profile_initialized; - bool sys_trace_initialized; + _PyOnceFlag sys_profile_once_flag; + _PyOnceFlag sys_trace_once_flag; Py_ssize_t sys_profiling_threads; /* Count of threads with c_profilefunc set */ Py_ssize_t sys_tracing_threads; /* Count of threads with c_tracefunc set */ PyObject *monitoring_callables[PY_MONITORING_TOOL_IDS][_PY_MONITORING_EVENTS]; diff --git a/Include/internal/pycore_interpframe.h b/Include/internal/pycore_interpframe.h index d3fd218b27eed7..19914e8cef7ed2 100644 --- a/Include/internal/pycore_interpframe.h +++ b/Include/internal/pycore_interpframe.h @@ -24,6 +24,36 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { return (PyCodeObject *)executable; } +// Similar to _PyFrame_GetCode(), but return NULL if the frame is invalid or +// freed. Used by dump_frame() in Python/traceback.c. The function uses +// heuristics to detect freed memory, it's not 100% reliable. +static inline PyCodeObject* +_PyFrame_SafeGetCode(_PyInterpreterFrame *f) +{ + // globals and builtins may be NULL on a legit frame, but it's unlikely. + // It's more likely that it's a sign of an invalid frame. + if (f->f_globals == NULL || f->f_builtins == NULL) { + return NULL; + } + + if (PyStackRef_IsNull(f->f_executable)) { + return NULL; + } + void *ptr; + memcpy(&ptr, &f->f_executable, sizeof(f->f_executable)); + if (_PyMem_IsPtrFreed(ptr)) { + return NULL; + } + PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); + if (_PyObject_IsFreed(executable)) { + return NULL; + } + if (!PyCode_Check(executable)) { + return NULL; + } + return (PyCodeObject *)executable; +} + static inline _Py_CODEUNIT * _PyFrame_GetBytecode(_PyInterpreterFrame *f) { @@ -37,6 +67,31 @@ _PyFrame_GetBytecode(_PyInterpreterFrame *f) #endif } +// Similar to PyUnstable_InterpreterFrame_GetLasti(), but return NULL if the +// frame is invalid or freed. Used by dump_frame() in Python/traceback.c. The +// function uses heuristics to detect freed memory, it's not 100% reliable. +static inline int +_PyFrame_SafeGetLasti(struct _PyInterpreterFrame *f) +{ + // Code based on _PyFrame_GetBytecode() but replace _PyFrame_GetCode() + // with _PyFrame_SafeGetCode(). + PyCodeObject *co = _PyFrame_SafeGetCode(f); + if (co == NULL) { + return -1; + } + + _Py_CODEUNIT *bytecode; +#ifdef Py_GIL_DISABLED + _PyCodeArray *tlbc = _PyCode_GetTLBCArray(co); + assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size); + bytecode = (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index]; +#else + bytecode = _PyCode_CODE(co); +#endif + + return (int)(f->instr_ptr - bytecode) * sizeof(_Py_CODEUNIT); +} + static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) { PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj); assert(PyFunction_Check(func)); diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 7484b05d7f2446..9b071573ad3c74 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -25,13 +25,6 @@ PyMutex_LockFast(PyMutex *m) return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); } -// Checks if the mutex is currently locked. -static inline int -PyMutex_IsLocked(PyMutex *m) -{ - return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; -} - // Re-initializes the mutex after a fork to the unlocked state. static inline void _PyMutex_at_fork_reinit(PyMutex *m) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ed6c435316708e..3196d1b82084b9 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -158,6 +158,11 @@ PyAPI_FUNC(int) _PyLong_UnsignedLongLong_Converter(PyObject *, void *); // Export for '_testclinic' shared extension (Argument Clinic code) PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt8_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt16_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt32_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt64_Converter(PyObject *, void *); + /* Long value tag bits: * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1. * 2: Set to 1 for the small ints diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 1cc897c5a69469..f31951057121d9 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -277,6 +277,9 @@ Known values: Python 3.14a7 3622 (Store annotations in different class dict keys) Python 3.14a7 3623 (Add BUILD_INTERPOLATION & BUILD_TEMPLATE opcodes) Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST) + Python 3.14b3 3625 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST) + Python 3.14rc2 3626 (Fix missing exception handlers in logical expression) + Python 3.14rc3 3627 (Fix miscompilation of some module-level annotations) Python 3.15 will start with 3650 @@ -289,7 +292,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3624 +#define PYC_MAGIC_NUMBER 3627 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index b7e162c8abcabf..12c5614834f565 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -614,7 +614,7 @@ static inline PyObject * _Py_XGetRef(PyObject **ptr) { for (;;) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } @@ -629,7 +629,7 @@ _Py_XGetRef(PyObject **ptr) static inline PyObject * _Py_TryXGetRef(PyObject **ptr) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } @@ -1007,6 +1007,22 @@ enum _PyAnnotateFormat { _Py_ANNOTATE_FORMAT_STRING = 4, }; +int _PyObject_SetDict(PyObject *obj, PyObject *value); + +#ifndef Py_GIL_DISABLED +static inline Py_ALWAYS_INLINE void _Py_INCREF_MORTAL(PyObject *op) +{ + assert(!_Py_IsStaticImmortal(op)); + op->ob_refcnt++; + _Py_INCREF_STAT_INC(); +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) + if (!_Py_IsImmortal(op)) { + _Py_INCREF_IncRefTotal(); + } +#endif +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0e34074f1600a7..947e0e58868e1d 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1366,7 +1366,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_ISINSTANCE] = { .nuops = 1, .uops = { { _CALL_ISINSTANCE, OPARG_SIMPLE, 3 } } }, [CALL_KW_BOUND_METHOD] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION_KW, 2, 1 }, { _EXPAND_METHOD_KW, OPARG_SIMPLE, 3 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_KW_NON_PY] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE_KW, OPARG_SIMPLE, 3 }, { _CALL_KW_NON_PY, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_KW_PY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION_KW, 2, 1 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, + [CALL_KW_PY] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION_KW, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_LEN] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_LEN, OPARG_SIMPLE, 3 }, { _CALL_LEN, OPARG_SIMPLE, 3 } } }, [CALL_LIST_APPEND] = { .nuops = 1, .uops = { { _CALL_LIST_APPEND, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_FAST] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, @@ -1787,6 +1787,35 @@ const uint8_t _PyOpcode_Caches[256] = { extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_METADATA const uint8_t _PyOpcode_Deopt[256] = { + [121] = 121, + [122] = 122, + [123] = 123, + [124] = 124, + [125] = 125, + [126] = 126, + [127] = 127, + [212] = 212, + [213] = 213, + [214] = 214, + [215] = 215, + [216] = 216, + [217] = 217, + [218] = 218, + [219] = 219, + [220] = 220, + [221] = 221, + [222] = 222, + [223] = 223, + [224] = 224, + [225] = 225, + [226] = 226, + [227] = 227, + [228] = 228, + [229] = 229, + [230] = 230, + [231] = 231, + [232] = 232, + [233] = 233, [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index 62af06dc01c125..79a1a242556a52 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -56,6 +56,8 @@ extern "C" { #define IS_RETURN_OPCODE(opcode) \ (opcode == RETURN_VALUE) +#define IS_RAISE_OPCODE(opcode) \ + (opcode == RAISE_VARARGS || opcode == RERAISE) /* Flags used in the oparg for MAKE_FUNCTION */ diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 3e41e2fd1569ca..4187f46995e2c3 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -31,6 +31,8 @@ extern "C" { _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ _Py_atomic_load_ptr_acquire(&value) +#define FT_ATOMIC_LOAD_PTR_CONSUME(value) \ + _Py_atomic_load_ptr_consume(&value) #define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) \ _Py_atomic_load_uintptr_acquire(&value) #define FT_ATOMIC_LOAD_PTR_RELAXED(value) \ @@ -77,6 +79,10 @@ extern "C" { _Py_atomic_store_ushort_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_LOAD_INT(value) \ + _Py_atomic_load_int(&value) +#define FT_ATOMIC_STORE_INT(value, new_value) \ + _Py_atomic_store_int(&value, new_value) #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ _Py_atomic_store_int_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_INT_RELAXED(value) \ @@ -119,6 +125,7 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_PTR_CONSUME(value) value #define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value #define FT_ATOMIC_LOAD_PTR_RELAXED(value) value #define FT_ATOMIC_LOAD_UINT8(value) value @@ -142,6 +149,8 @@ extern "C" { #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT(value) value +#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value #define FT_ATOMIC_LOAD_INT_RELAXED(value) value #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index f357b88e220e6e..2c2048f7e1272a 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -94,13 +94,13 @@ extern void _PyErr_Fetch( PyObject **value, PyObject **traceback); -extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); +PyAPI_FUNC(PyObject*) _PyErr_GetRaisedException(PyThreadState *tstate); PyAPI_FUNC(int) _PyErr_ExceptionMatches( PyThreadState *tstate, PyObject *exc); -extern void _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); +PyAPI_FUNC(void) _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); extern void _PyErr_Restore( PyThreadState *tstate, diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 6e89ca33e4208c..8faf7a4d403f84 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -41,6 +41,7 @@ extern PyStatus _Py_HashRandomization_Init(const PyConfig *); extern PyStatus _PyGC_Init(PyInterpreterState *interp); extern PyStatus _PyAtExit_Init(PyInterpreterState *interp); +extern PyStatus _PyDateTime_InitTypes(PyInterpreterState *interp); /* Various internal finalizers */ diff --git a/Include/internal/pycore_pymath.h b/Include/internal/pycore_pymath.h index eea8996ba68ca0..5bbbdaab8f42dc 100644 --- a/Include/internal/pycore_pymath.h +++ b/Include/internal/pycore_pymath.h @@ -146,17 +146,17 @@ extern void _Py_set_387controlword(unsigned short); unsigned int old_fpcr, new_fpcr #define _Py_SET_53BIT_PRECISION_START \ do { \ - __asm__ ("fmove.l %%fpcr,%0" : "=g" (old_fpcr)); \ + __asm__ ("fmove.l %%fpcr,%0" : "=dm" (old_fpcr)); \ /* Set double precision / round to nearest. */ \ new_fpcr = (old_fpcr & ~0xf0) | 0x80; \ if (new_fpcr != old_fpcr) { \ - __asm__ volatile ("fmove.l %0,%%fpcr" : : "g" (new_fpcr));\ + __asm__ volatile ("fmove.l %0,%%fpcr" : : "dm" (new_fpcr)); \ } \ } while (0) #define _Py_SET_53BIT_PRECISION_END \ do { \ if (new_fpcr != old_fpcr) { \ - __asm__ volatile ("fmove.l %0,%%fpcr" : : "g" (old_fpcr)); \ + __asm__ volatile ("fmove.l %0,%%fpcr" : : "dm" (old_fpcr)); \ } \ } while (0) #endif diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 02537bdfef8598..f0b90a25597d0d 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -54,20 +54,43 @@ static inline int _PyMem_IsPtrFreed(const void *ptr) { uintptr_t value = (uintptr_t)ptr; #if SIZEOF_VOID_P == 8 - return (value == 0 + return (value <= 0xff // NULL, 0x1, 0x2, ..., 0xff || value == (uintptr_t)0xCDCDCDCDCDCDCDCD || value == (uintptr_t)0xDDDDDDDDDDDDDDDD - || value == (uintptr_t)0xFDFDFDFDFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFDFDFDFDFD + || value >= (uintptr_t)0xFFFFFFFFFFFFFF00); // -0xff, ..., -2, -1 #elif SIZEOF_VOID_P == 4 - return (value == 0 + return (value <= 0xff || value == (uintptr_t)0xCDCDCDCD || value == (uintptr_t)0xDDDDDDDD - || value == (uintptr_t)0xFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFD + || value >= (uintptr_t)0xFFFFFF00); #else # error "unknown pointer size" #endif } +// Similar to _PyMem_IsPtrFreed() but expects an 'unsigned long' instead of a +// pointer. +static inline int _PyMem_IsULongFreed(unsigned long value) +{ +#if SIZEOF_LONG == 8 + return (value == 0 + || value == (unsigned long)0xCDCDCDCDCDCDCDCD + || value == (unsigned long)0xDDDDDDDDDDDDDDDD + || value == (unsigned long)0xFDFDFDFDFDFDFDFD + || value == (unsigned long)0xFFFFFFFFFFFFFFFF); +#elif SIZEOF_LONG == 4 + return (value == 0 + || value == (unsigned long)0xCDCDCDCD + || value == (unsigned long)0xDDDDDDDD + || value == (unsigned long)0xFDFDFDFD + || value == (unsigned long)0xFFFFFFFF); +#else +# error "unknown long size" +#endif +} + extern int _PyMem_GetAllocatorName( const char *name, PyMemAllocatorName *allocator); @@ -88,7 +111,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Enqueue an object to be freed possibly after some delay #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 633e5cf77db918..7f1b0b7523b20d 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_pythonrun.h" // _PyOS_STACK_MARGIN_SHIFT #include "pycore_typedefs.h" // _PyRuntimeState #include "pycore_tstate.h" @@ -325,7 +326,11 @@ _Py_RecursionLimit_GetMargin(PyThreadState *tstate) _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); intptr_t here_addr = _Py_get_machine_stack_pointer(); - return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, PYOS_STACK_MARGIN_SHIFT); +#if _Py_STACK_GROWS_DOWN + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, _PyOS_STACK_MARGIN_SHIFT); +#else + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (intptr_t)_tstate->c_stack_soft_limit - here_addr, _PyOS_STACK_MARGIN_SHIFT); +#endif } #ifdef __cplusplus diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index 0bfc5704dc4c59..b25abb4f64c688 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject( PyObject *filename, PyCompilerFlags *flags); +extern int _PyObject_SupportedAsScript(PyObject *); extern const char* _Py_SourceAsString( PyObject *cmd, const char *funcname, @@ -32,6 +33,34 @@ extern const char* _Py_SourceAsString( PyCompilerFlags *cf, PyObject **cmd_copy); + +/* Stack size, in "pointers". This must be large enough, so + * no two calls to check recursion depth are more than this far + * apart. In practice, that means it must be larger than the C + * stack consumption of PyEval_EvalDefault */ +#if (defined(Py_DEBUG) \ + || defined(_Py_ADDRESS_SANITIZER) \ + || defined(_Py_THREAD_SANITIZER)) +# define _PyOS_LOG2_STACK_MARGIN 12 +#else +# define _PyOS_LOG2_STACK_MARGIN 11 +#endif +#define _PyOS_STACK_MARGIN (1 << _PyOS_LOG2_STACK_MARGIN) +#define _PyOS_STACK_MARGIN_BYTES (_PyOS_STACK_MARGIN * sizeof(void *)) + +#if SIZEOF_VOID_P == 8 +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 3) +#else +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2) +#endif + +#ifdef _Py_THREAD_SANITIZER +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 6) +#else +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 3) +#endif + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..1f9b3fcf777493 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -48,8 +48,21 @@ struct _qsbr_thread_state { // Thread state (or NULL) PyThreadState *tstate; - // Used to defer advancing write sequence a fixed number of times - int deferrals; + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; // Is this thread state allocated? bool allocated; @@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) extern uint64_t _Py_qsbr_advance(struct _qsbr_shared *shared); -// Batches requests to advance the write sequence. This advances the write -// sequence every N calls, which reduces overhead but increases time to -// reclamation. Returns the new goal. +// Return the next value for the write sequence (current plus the increment). extern uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} // Have the read sequences advanced to the given goal? If this returns true, // it safe to reclaim any memory tagged with the goal (or earlier goal). diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4a34ffa559e124..e642a42bbfc3e4 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -794,6 +794,7 @@ extern "C" { INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ + INIT_ID(all_threads), \ INIT_ID(allow_code), \ INIT_ID(any), \ INIT_ID(append), \ @@ -891,6 +892,7 @@ extern "C" { INIT_ID(data), \ INIT_ID(database), \ INIT_ID(day), \ + INIT_ID(debug), \ INIT_ID(decode), \ INIT_ID(decoder), \ INIT_ID(default), \ @@ -902,7 +904,6 @@ extern "C" { INIT_ID(deterministic), \ INIT_ID(device), \ INIT_ID(dict), \ - INIT_ID(dict_content), \ INIT_ID(dictcomp), \ INIT_ID(difference_update), \ INIT_ID(digest), \ @@ -1137,6 +1138,7 @@ extern "C" { INIT_ID(offset_src), \ INIT_ID(on_type_read), \ INIT_ID(onceregistry), \ + INIT_ID(only_active_thread), \ INIT_ID(only_keys), \ INIT_ID(oparg), \ INIT_ID(opcode), \ diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 6bf3aae7175a97..c34d7e7edc0715 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -279,12 +279,6 @@ struct pyruntimestate { struct _types_runtime_state types; struct _Py_time_runtime_state time; -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - // Used in "Python/emscripten_trampoline.c" to choose between type - // reflection trampoline and EM_JS trampoline. - int (*emscripten_count_args_function)(PyCFunctionWithKeywords func); -#endif - /* All the objects that are shared by the runtime's interpreters. */ struct _Py_cached_objects cached_objects; struct _Py_static_objects static_objects; diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a2acf311ff4c65..52acd918c9b9f9 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -239,6 +239,7 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #else #define Py_INT_TAG 3 +#define Py_TAG_REFCNT 1 static inline bool PyStackRef_IsTaggedInt(_PyStackRef i) @@ -264,7 +265,7 @@ PyStackRef_UntagInt(_PyStackRef i) #ifdef Py_GIL_DISABLED -#define Py_TAG_DEFERRED (1) +#define Py_TAG_DEFERRED Py_TAG_REFCNT #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) @@ -322,7 +323,7 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj) static inline bool PyStackRef_IsHeapSafe(_PyStackRef stackref) { - if (PyStackRef_IsDeferred(stackref)) { + if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsTaggedInt(stackref)) { PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); return obj == NULL || _Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj); } @@ -442,14 +443,13 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) /* References to immortal objects always have their tag bit set to Py_TAG_REFCNT * as they can (must) have their reclamation deferred */ -#define Py_TAG_BITS 1 -#define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 #if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) -#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_REFCNT))) #define PyStackRef_NULL_BITS Py_TAG_REFCNT static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; @@ -529,7 +529,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); #if SIZEOF_VOID_P > 4 - unsigned int tag = obj->ob_flags & Py_TAG_BITS; + unsigned int tag = obj->ob_flags & Py_TAG_REFCNT; #else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; #endif @@ -548,12 +548,6 @@ PyStackRef_FromPyObjectStealMortal(PyObject *obj) return ref; } -// Check if a stackref is exactly the same as another stackref, including the -// the deferred bit. This can only be used safely if you know that the deferred -// bits of `a` and `b` match. -#define PyStackRef_IsExactly(a, b) \ - (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) - static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -561,7 +555,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) if (_Py_IsImmortal(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -572,7 +566,7 @@ static inline _PyStackRef _PyStackRef_FromPyObjectNewMortal(PyObject *obj) { assert(obj != NULL); - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -590,14 +584,14 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) /* WARNING: This macro evaluates its argument more than once */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_RefcountOnObject(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) + (PyStackRef_RefcountOnObject(REF) ? (_Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); if (PyStackRef_RefcountOnObject(ref)) { - Py_INCREF_MORTAL(BITS_TO_PTR(ref)); + _Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; } @@ -606,7 +600,7 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0 || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); + return (ref.bits & Py_TAG_BITS) != Py_TAG_REFCNT || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); } static inline _PyStackRef @@ -681,7 +675,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT))) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) @@ -771,7 +765,7 @@ _Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out) static inline int _Py_TryXGetStackRef(PyObject **src, _PyStackRef *out) { - PyObject *op = _Py_atomic_load_ptr_relaxed(src); + PyObject *op = _PyObject_CAST(_Py_atomic_load_ptr_relaxed(src)); if (op == NULL) { *out = PyStackRef_NULL; return 1; diff --git a/Include/internal/pycore_stats.h b/Include/internal/pycore_stats.h index ab649574f33dbf..24f239a2135b93 100644 --- a/Include/internal/pycore_stats.h +++ b/Include/internal/pycore_stats.h @@ -77,7 +77,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define RARE_EVENT_INTERP_INC(interp, name) \ do { \ /* saturating add */ \ - int val = FT_ATOMIC_LOAD_UINT8_RELAXED(interp->rare_events.name); \ + uint8_t val = FT_ATOMIC_LOAD_UINT8_RELAXED(interp->rare_events.name); \ if (val < UINT8_MAX) { \ FT_ATOMIC_STORE_UINT8(interp->rare_events.name, val + 1); \ } \ diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h index d71dd2886999a6..8357cce9d899fb 100644 --- a/Include/internal/pycore_traceback.h +++ b/Include/internal/pycore_traceback.h @@ -100,8 +100,11 @@ extern int _Py_WriteIndentedMargin(int, const char*, PyObject *); extern int _Py_WriteIndent(int, PyObject *); // Export for the faulthandler module +PyAPI_FUNC(void) _Py_InitDumpStack(void); PyAPI_FUNC(void) _Py_DumpStack(int fd); +extern void _Py_DumpTraceback_Init(void); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index bad968428c73a1..c3ac52bd76613e 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -76,6 +76,10 @@ typedef struct _PyThreadStateImpl { Py_ssize_t reftotal; // this thread's total refcount operations #endif + // PyUnstable_ThreadState_ResetStackProtection() values + uintptr_t c_stack_init_base; + uintptr_t c_stack_init_top; + } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 1a4f89fd2449a0..ba66364d3e94b1 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -149,6 +149,11 @@ typedef int (*_py_validate_type)(PyTypeObject *); extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version); +// Like PyType_GetBaseByToken, but does not modify refcounts. +// Cannot fail; arguments must be valid. +PyAPI_FUNC(int) +_PyType_GetBaseByToken_Borrow(PyTypeObject *type, void *token, PyTypeObject **result); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c85d53b89accdb..3791b913c17546 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -139,14 +139,18 @@ extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful( // Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape // chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( const char *string, /* Unicode-Escape encoded string */ Py_ssize_t length, /* size of string */ const char *errors, /* error handling */ Py_ssize_t *consumed, /* bytes consumed */ - const char **first_invalid_escape); /* on return, points to first - invalid escaped char in - string. */ + int *first_invalid_escape_char, /* on return, if not -1, contain the first + invalid escaped char (<= 0xff) or invalid + octal escape (> 0xff) in string. */ + const char **first_invalid_escape_ptr); /* on return, if not NULL, may + point to the first invalid escaped + char in string. + May be NULL if errors is not NULL. */ /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index fefacef77c89ee..c3ecdf64c6ee00 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -936,6 +936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(all_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(allow_code); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1324,6 +1328,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(debug); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(decode); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1368,10 +1376,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(dict_content); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(dictcomp); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2308,6 +2312,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(only_active_thread); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(only_keys); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index 950aa0af290056..4ed8928c0b92a8 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -29,6 +29,12 @@ extern "C" { PyMutex_LockFlags(wr->weakrefs_lock, _Py_LOCK_DONT_DETACH) #define UNLOCK_WEAKREFS_FOR_WR(wr) PyMutex_Unlock(wr->weakrefs_lock) +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + PyObject_ClearWeakRefs(obj); \ + } while (0) + #else #define LOCK_WEAKREFS(obj) @@ -37,6 +43,14 @@ extern "C" { #define LOCK_WEAKREFS_FOR_WR(wr) #define UNLOCK_WEAKREFS_FOR_WR(wr) +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + if (weakref_list != NULL) { \ + PyObject_ClearWeakRefs(obj); \ + } \ + } while (0) + #endif static inline int _is_dead(PyObject *obj) diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..4d577d0e32d791 100644 --- a/Include/object.h +++ b/Include/object.h @@ -654,8 +654,13 @@ PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ PyAPI_FUNC(int) Py_IsNone(PyObject *x); #define Py_IsNone(x) Py_Is((x), Py_None) -/* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_None +/* Macro for returning Py_None from a function. + * Only treat Py_None as immortal in the limited C API 3.12 and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_NONE return Py_NewRef(Py_None) +#else +# define Py_RETURN_NONE return Py_None +#endif /* Py_NotImplemented is a singleton used to signal that an operation is @@ -669,8 +674,13 @@ PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ # define Py_NotImplemented (&_Py_NotImplementedStruct) #endif -/* Macro for returning Py_NotImplemented from a function */ -#define Py_RETURN_NOTIMPLEMENTED return Py_NotImplemented +/* Macro for returning Py_NotImplemented from a function. Only treat + * Py_NotImplemented as immortal in the limited C API 3.12 and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_NOTIMPLEMENTED return Py_NewRef(Py_NotImplemented) +#else +# define Py_RETURN_NOTIMPLEMENTED return Py_NotImplemented +#endif /* Rich comparison opcodes */ #define Py_LT 0 diff --git a/Include/patchlevel.h b/Include/patchlevel.h index c9dd659046a367..7cda21cb678e7f 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -19,12 +19,12 @@ /*--start constants--*/ #define PY_MAJOR_VERSION 3 #define PY_MINOR_VERSION 14 -#define PY_MICRO_VERSION 0 -#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_BETA -#define PY_RELEASE_SERIAL 1 +#define PY_MICRO_VERSION 2 +#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL +#define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.14.0b1" +#define PY_VERSION "3.14.2+" /*--end constants--*/ diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 9824d099c3df7d..04548b7684a2fd 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -52,6 +52,11 @@ struct PyExpat_CAPI int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); /* might be NULL for expat < 2.6.0 */ XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); + /* might be NULL for expat < 2.7.2 */ + XML_Bool (*SetAllocTrackerActivationThreshold)( + XML_Parser parser, unsigned long long activationThresholdBytes); + XML_Bool (*SetAllocTrackerMaximumAmplification)( + XML_Parser parser, float maxAmplificationFactor); /* always add new stuff to the end! */ }; diff --git a/Include/pyport.h b/Include/pyport.h index 3eac119bf8e8d8..2c8567f2554d9b 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,8 +49,9 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ - || (defined(__cplusplus) && __cplusplus >= 201103) +#if !defined(_MSC_VER) && \ + ((defined (__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) \ + || (defined(__cplusplus) && __cplusplus >= 201103)) # define _Py_NULL nullptr #else # define _Py_NULL NULL @@ -700,4 +701,10 @@ extern "C" { #endif +// Assume the stack grows down unless specified otherwise +#ifndef _Py_STACK_GROWS_DOWN +# define _Py_STACK_GROWS_DOWN 1 +#endif + + #endif /* Py_PYPORT_H */ diff --git a/Include/pythonrun.h b/Include/pythonrun.h index fad2b3c77476e4..92b50aa807bf70 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -21,39 +21,15 @@ PyAPI_FUNC(void) PyErr_DisplayException(PyObject *); /* Stuff with no proper home (yet) */ PyAPI_DATA(int) (*PyOS_InputHook)(void); -/* Stack size, in "pointers". This must be large enough, so - * no two calls to check recursion depth are more than this far - * apart. In practice, that means it must be larger than the C - * stack consumption of PyEval_EvalDefault */ -#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) -# define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(Py_DEBUG) && defined(WIN32) -# define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really a size */ -# define PYOS_LOG2_STACK_MARGIN 9 -#else -# define PYOS_LOG2_STACK_MARGIN 11 -#endif -#define PYOS_STACK_MARGIN (1 << PYOS_LOG2_STACK_MARGIN) -#define PYOS_STACK_MARGIN_BYTES (PYOS_STACK_MARGIN * sizeof(void *)) - -#if SIZEOF_VOID_P == 8 -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 3) -#else -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 2) -#endif - - #if defined(WIN32) -#define USE_STACKCHECK +# define USE_STACKCHECK #endif - #ifdef USE_STACKCHECK /* Check that we aren't overflowing our stack */ PyAPI_FUNC(int) PyOS_CheckStack(void); #endif + #ifndef Py_LIMITED_API # define Py_CPYTHON_PYTHONRUN_H # include "cpython/pythonrun.h" diff --git a/Include/refcount.h b/Include/refcount.h index 177bbdaf0c5977..1ef6b2186bba03 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -1,5 +1,5 @@ -#ifndef Py_REFCOUNT_H -#define Py_REFCOUNT_H +#ifndef _Py_REFCOUNT_H +#define _Py_REFCOUNT_H #ifdef __cplusplus extern "C" { #endif @@ -33,7 +33,7 @@ increase and decrease the objects reference count. In order to offer sufficient resilience to C extensions using the stable ABI compiled against 3.11 or earlier, we set the initial value near the -middle of the range (2**31, 2**32). That way the the refcount can be +middle of the range (2**31, 2**32). That way the refcount can be off by ~1 billion without affecting immortality. Reference count increases will use saturated arithmetic, taking advantage of @@ -117,6 +117,8 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); } #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_REFCNT(ob) _Py_REFCNT(_PyObject_CAST(ob)) + #else + # define Py_REFCNT(ob) _Py_REFCNT(ob) #endif #endif @@ -247,20 +249,6 @@ PyAPI_FUNC(void) Py_DecRef(PyObject *); PyAPI_FUNC(void) _Py_IncRef(PyObject *); PyAPI_FUNC(void) _Py_DecRef(PyObject *); -#ifndef Py_GIL_DISABLED -static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) -{ - assert(!_Py_IsStaticImmortal(op)); - op->ob_refcnt++; - _Py_INCREF_STAT_INC(); -#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) - if (!_Py_IsImmortal(op)) { - _Py_INCREF_IncRefTotal(); - } -#endif -} -#endif - static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) { #if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) @@ -564,4 +552,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #ifdef __cplusplus } #endif -#endif // !Py_REFCOUNT_H +#endif // !_Py_REFCOUNT_H diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 02bbdf6071fa12..742af5efcf5ce5 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -604,4 +604,4 @@ References pp. 213--227, 1997. [^2]: The Zephyr Abstract Syntax Description Language.: - https://www.cs.princeton.edu/research/techreps/TR-554-97 + https://www.cs.princeton.edu/research/techreps/254 diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 28589787e1fad7..9e38da4c862f16 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -8,7 +8,7 @@ The cost of raising an exception is increased, but not by much. The following code: -``` +```python try: g(0) except: @@ -18,7 +18,7 @@ except: compiles into intermediate code like the following: -``` +```python RESUME 0 1 SETUP_FINALLY 8 (to L1) @@ -118,13 +118,13 @@ All offsets and lengths are in code units, not bytes. We want the format to be compact, but quickly searchable. For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. -For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. +For it to be searchable quickly, we need to support binary search giving us `log(n)` performance in all cases. Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: `start, size, target, depth, push-lasti`. -Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. +Also, sizes are limited to `2**30` as the code length cannot exceed `2**31` and each code unit takes 2 bytes. It also happens that depth is generally quite small. So, we need to encode: @@ -140,7 +140,7 @@ lasti (1 bit) We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. -The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. +The 8 bits of a byte are (msb left) `SXdddddd` where `S` is the start bit. `X` is the extend bit meaning that the next byte is required to extend the offset. In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 4da6cd47dc859e..9c35684c945b3e 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -286,7 +286,7 @@ object, the GC does not process it twice. Notice that an object that was marked as "tentatively unreachable" and was later moved back to the reachable list will be visited again by the garbage collector -as now all the references that that object has need to be processed as well. This +as now all the references that the object has need to be processed as well. This process is really a breadth first search over the object graph. Once all the objects are scanned, the GC knows that all container objects in the tentatively unreachable list are really unreachable and can thus be garbage collected. diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md index 87fbb91236844b..a7a40fcd0ed009 100644 --- a/InternalDocs/generators.md +++ b/InternalDocs/generators.md @@ -64,7 +64,7 @@ Iteration The [`FOR_ITER`](https://docs.python.org/dev/library/dis.html#opcode-FOR_ITER) instruction calls `__next__` on the iterator which is on the top of the stack, -and pushes the result to the stack. It has [`specializations`](adaptive.md) +and pushes the result to the stack. It has [`specializations`](interpreter.md) for a few common iterator types, including `FOR_ITER_GEN`, for iterating over a generator. `FOR_ITER_GEN` bypasses the call to `__next__`, and instead directly pushes the generator stack and resumes its execution from the diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md index c98ccb3ace8d9e..095853807377a4 100644 --- a/InternalDocs/jit.md +++ b/InternalDocs/jit.md @@ -89,7 +89,7 @@ When the full jit is enabled (python was configured with [`--enable-experimental-jit`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit), the uop executor's `jit_code` field is populated with a pointer to a compiled C function that implements the executor logic. This function's signature is -defined by `jit_func` in [`pycore_jit.h`](Include/internal/pycore_jit.h). +defined by `jit_func` in [`pycore_jit.h`](../Include/internal/pycore_jit.h). When the executor is invoked by `ENTER_EXECUTOR`, instead of jumping to the uop interpreter at `tier2_dispatch`, the executor runs the function that `jit_code` points to. This function returns the instruction pointer diff --git a/InternalDocs/stack_protection.md b/InternalDocs/stack_protection.md new file mode 100644 index 00000000000000..14802e57d095f4 --- /dev/null +++ b/InternalDocs/stack_protection.md @@ -0,0 +1,68 @@ +# Stack Protection + +CPython protects against stack overflow in the form of runaway, or just very deep, recursion by raising a `RecursionError` instead of just crashing. +Protection against pure Python stack recursion has existed since very early, but in 3.12 we added protection against stack overflow +in C code. This was initially implemented using a counter and later improved in 3.14 to use the actual stack depth. +For those platforms that support it (Windows, Mac, and most Linuxes) we query the operating system to find the stack bounds. +For other platforms we use conservative estimates. + + +The C stack looks like this: + +``` + +-------+ <--- Top of machine stack + | | + | | + + ~~ + + | | + | | + +-------+ <--- Soft limit + | | + | | _PyOS_STACK_MARGIN_BYTES + | | + +-------+ <--- Hard limit + | | + | | _PyOS_STACK_MARGIN_BYTES + | | + +-------+ <--- Bottom of machine stack +``` + + +We get the current stack pointer using compiler intrinsics where available, or by taking the address of a C local variable. See `_Py_get_machine_stack_pointer()`. + +The soft and hard limits pointers are set by calling `_Py_InitializeRecursionLimits()` during thread initialization. + +Recursion checks are performed by `_Py_EnterRecursiveCall()` or `_Py_EnterRecursiveCallTstate()` which compare the stack pointer to the soft limit. If the stack pointer is lower than the soft limit, then `_Py_CheckRecursiveCall()` is called which checks against both the hard and soft limits: + +```python +kb_used = (stack_top - stack_pointer)>>10 +if stack_pointer < bottom_of_machine_stack: + pass # Our stack limits could be wrong so it is safest to do nothing. +elif stack_pointer < hard_limit: + FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)") +elif stack_pointer < soft_limit: + raise RecursionError(f"Stack overflow (used {kb_used} kB)") +``` + +### User space threads and other oddities + +Some libraries provide user-space threads. These will change the C stack at runtime. +To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit. + +### Diagnosing and fixing stack overflows + +For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`. + +If you see a traceback ending in: `RecursionError: Stack overflow (used ... kB)` then the stack protection is working as intended. If you don't expect to see the error, then check the amount of stack used. If it seems low then CPython may not be configured properly. + +However, if you see a fatal error or crash, then something is not right. +Either a recursive call is not checking `_Py_EnterRecursiveCall()`, or the amount of C stack consumed by a single call exceeds `_PyOS_STACK_MARGIN_BYTES`. If a hard crash occurs, it probably means that the amount of C stack consumed is more than double `_PyOS_STACK_MARGIN_BYTES`. + +Likely causes: +* Recursive code is not calling `_Py_EnterRecursiveCall()` +* `-O0` compilation flags, especially for Clang. With no optimization, C calls can consume a lot of stack space +* Giant, complex functions in third-party C extensions. This is unlikely as the function in question would need to be more complicated than the bytecode interpreter. +* `_PyOS_STACK_MARGIN_BYTES` is just too low. +* `_Py_InitializeRecursionLimits()` is not setting the soft and hard limits correctly for that platform. diff --git a/Lib/_android_support.py b/Lib/_android_support.py index ae506f6a4b57b8..a439d03a144dd2 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -29,15 +29,19 @@ def init_streams(android_log_write, stdout_prio, stderr_prio): global logcat logcat = Logcat(android_log_write) - - sys.stdout = TextLogStream( - stdout_prio, "python.stdout", sys.stdout.fileno()) - sys.stderr = TextLogStream( - stderr_prio, "python.stderr", sys.stderr.fileno()) + sys.stdout = TextLogStream(stdout_prio, "python.stdout", sys.stdout) + sys.stderr = TextLogStream(stderr_prio, "python.stderr", sys.stderr) class TextLogStream(io.TextIOWrapper): - def __init__(self, prio, tag, fileno=None, **kwargs): + def __init__(self, prio, tag, original=None, **kwargs): + # Respect the -u option. + if original: + kwargs.setdefault("write_through", original.write_through) + fileno = original.fileno() + else: + fileno = None + # The default is surrogateescape for stdout and backslashreplace for # stderr, but in the context of an Android log, readability is more # important than reversibility. diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py index 0b669edb2ffec6..1c8741b5a55483 100644 --- a/Lib/_ast_unparse.py +++ b/Lib/_ast_unparse.py @@ -626,32 +626,11 @@ def _write_ftstring(self, values, prefix): ) self._ftstring_helper(fstring_parts) - def _tstring_helper(self, node): - last_idx = 0 - for i, value in enumerate(node.values): - # This can happen if we have an implicit concat of a t-string - # with an f-string - if isinstance(value, FormattedValue): - if i > last_idx: - # Write t-string until here - self._write_ftstring(node.values[last_idx:i], "t") - self.write(" ") - # Write f-string with the current formatted value - self._write_ftstring([node.values[i]], "f") - if i + 1 < len(node.values): - # Only add a space if there are more values after this - self.write(" ") - last_idx = i + 1 - - if last_idx < len(node.values): - # Write t-string from last_idx to end - self._write_ftstring(node.values[last_idx:], "t") - def visit_JoinedStr(self, node): self._write_ftstring(node.values, "f") def visit_TemplateStr(self, node): - self._tstring_helper(node) + self._write_ftstring(node.values, "t") def _write_ftstring_inner(self, node, is_format_spec=False): if isinstance(node, JoinedStr): @@ -679,9 +658,12 @@ def _unparse_interpolation_value(self, inner): unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner) - def _write_interpolation(self, node): + def _write_interpolation(self, node, use_str_attr=False): with self.delimit("{", "}"): - expr = self._unparse_interpolation_value(node.value) + if use_str_attr: + expr = node.str + else: + expr = self._unparse_interpolation_value(node.value) if expr.startswith("{"): # Separate pair of opening brackets as "{ {" self.write(" ") @@ -696,7 +678,8 @@ def visit_FormattedValue(self, node): self._write_interpolation(node) def visit_Interpolation(self, node): - self._write_interpolation(node) + # If `str` is set to `None`, use the `value` to generate the source code. + self._write_interpolation(node, use_str_attr=node.str is not None) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index 51263d696a1777..241d40d57409ae 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -49,7 +49,7 @@ def _f(): pass "Mapping", "MutableMapping", "MappingView", "KeysView", "ItemsView", "ValuesView", "Sequence", "MutableSequence", - "Buffer", + "ByteString", "Buffer", ] # This module has been renamed from collections.abc to _collections_abc to @@ -1061,6 +1061,41 @@ def count(self, value): Sequence.register(range) Sequence.register(memoryview) +class _DeprecateByteStringMeta(ABCMeta): + def __new__(cls, name, bases, namespace, **kwargs): + if name != "ByteString": + import warnings + + warnings._deprecated( + "collections.abc.ByteString", + remove=(3, 17), + ) + return super().__new__(cls, name, bases, namespace, **kwargs) + + def __instancecheck__(cls, instance): + import warnings + + warnings._deprecated( + "collections.abc.ByteString", + remove=(3, 17), + ) + return super().__instancecheck__(instance) + +class ByteString(Sequence, metaclass=_DeprecateByteStringMeta): + """Deprecated ABC serving as a common supertype of ``bytes`` and ``bytearray``. + + This ABC is scheduled for removal in Python 3.17. + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the buffer protocol at runtime. For use in type annotations, + either use ``Buffer`` or a union that explicitly specifies the types your + code supports (e.g., ``bytes | bytearray | memoryview``). + """ + + __slots__ = () + +ByteString.register(bytes) +ByteString.register(bytearray) + class MutableSequence(Sequence): """All the operations on a read-write sequence. diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 4a310a402358b6..d6673f6692f761 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -1,4 +1,3 @@ -import io import os import sys @@ -155,7 +154,7 @@ def __iter__(self) -> Iterator[str]: return iter(self.__dataclass_fields__) -@dataclass(frozen=True) +@dataclass(frozen=True, kw_only=True) class Argparse(ThemeSection): usage: str = ANSIColors.BOLD_BLUE prog: str = ANSIColors.BOLD_MAGENTA @@ -176,6 +175,7 @@ class Argparse(ThemeSection): class Syntax(ThemeSection): prompt: str = ANSIColors.BOLD_MAGENTA keyword: str = ANSIColors.BOLD_BLUE + keyword_constant: str = ANSIColors.BOLD_BLUE builtin: str = ANSIColors.CYAN comment: str = ANSIColors.RED string: str = ANSIColors.GREEN @@ -272,21 +272,29 @@ def decolor(text: str) -> str: def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: + + def _safe_getenv(k: str, fallback: str | None = None) -> str | None: + """Exception-safe environment retrieval. See gh-128636.""" + try: + return os.environ.get(k, fallback) + except Exception: + return fallback + if file is None: file = sys.stdout if not sys.flags.ignore_environment: - if os.environ.get("PYTHON_COLORS") == "0": + if _safe_getenv("PYTHON_COLORS") == "0": return False - if os.environ.get("PYTHON_COLORS") == "1": + if _safe_getenv("PYTHON_COLORS") == "1": return True - if os.environ.get("NO_COLOR"): + if _safe_getenv("NO_COLOR"): return False if not COLORIZE: return False - if os.environ.get("FORCE_COLOR"): + if _safe_getenv("FORCE_COLOR"): return True - if os.environ.get("TERM") == "dumb": + if _safe_getenv("TERM") == "dumb": return False if not hasattr(file, "fileno"): @@ -303,7 +311,7 @@ def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: try: return os.isatty(file.fileno()) - except io.UnsupportedOperation: + except OSError: return hasattr(file, "isatty") and file.isatty() @@ -329,7 +337,8 @@ def get_theme( environment (including environment variable state and console configuration on Windows) can also change in the course of the application life cycle. """ - if force_color or (not force_no_color and can_colorize(file=tty_file)): + if force_color or (not force_no_color and + can_colorize(file=tty_file)): return _theme return theme_no_color diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py index cbaa94458629ac..55f8c06959180b 100644 --- a/Lib/_py_warnings.py +++ b/Lib/_py_warnings.py @@ -589,6 +589,9 @@ def __str__(self): "line : %r}" % (self.message, self._category_name, self.filename, self.lineno, self.line)) + def __repr__(self): + return f'<{type(self).__qualname__} {self}>' + class catch_warnings(object): @@ -762,27 +765,27 @@ def __new__(cls, /, *args, **kwargs): arg.__new__ = staticmethod(__new__) - original_init_subclass = arg.__init_subclass__ - # We need slightly different behavior if __init_subclass__ - # is a bound method (likely if it was implemented in Python) - if isinstance(original_init_subclass, MethodType): - original_init_subclass = original_init_subclass.__func__ + if "__init_subclass__" in arg.__dict__: + # __init_subclass__ is directly present on the decorated class. + # Synthesize a wrapper that calls this method directly. + original_init_subclass = arg.__init_subclass__ + # We need slightly different behavior if __init_subclass__ + # is a bound method (likely if it was implemented in Python). + # Otherwise, it likely means it's a builtin such as + # object's implementation of __init_subclass__. + if isinstance(original_init_subclass, MethodType): + original_init_subclass = original_init_subclass.__func__ @functools.wraps(original_init_subclass) def __init_subclass__(*args, **kwargs): _wm.warn(msg, category=category, stacklevel=stacklevel + 1) return original_init_subclass(*args, **kwargs) - - arg.__init_subclass__ = classmethod(__init_subclass__) - # Or otherwise, which likely means it's a builtin such as - # object's implementation of __init_subclass__. else: - @functools.wraps(original_init_subclass) - def __init_subclass__(*args, **kwargs): + def __init_subclass__(cls, *args, **kwargs): _wm.warn(msg, category=category, stacklevel=stacklevel + 1) - return original_init_subclass(*args, **kwargs) + return super(arg, cls).__init_subclass__(*args, **kwargs) - arg.__init_subclass__ = __init_subclass__ + arg.__init_subclass__ = classmethod(__init_subclass__) arg.__deprecated__ = __new__.__deprecated__ = msg __init_subclass__.__deprecated__ = msg diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index e3db1b52b1159b..70251dbb6535d2 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -213,17 +213,6 @@ def _need_normalize_century(): _normalize_century = True return _normalize_century -_supports_c99 = None -def _can_support_c99(): - global _supports_c99 - if _supports_c99 is None: - try: - _supports_c99 = ( - _time.strftime("%F", (1900, 1, 1, 0, 0, 0, 0, 1, 0)) == "1900-01-01") - except ValueError: - _supports_c99 = False - return _supports_c99 - # Correctly substitute for %z and %Z escapes in strftime formats. def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. @@ -283,7 +272,7 @@ def _wrap_strftime(object, format, timetuple): newformat.append(Zreplace) # Note that datetime(1000, 1, 1).strftime('%G') == '1000' so # year 1000 for %G can go on the fast path. - elif ((ch in 'YG' or ch in 'FC' and _can_support_c99()) and + elif ((ch in 'YG' or ch in 'FC') and object.year < 1000 and _need_normalize_century()): if ch == 'G': year = int(_time.strftime("%G", timetuple)) @@ -1127,8 +1116,8 @@ def isoformat(self): This is 'YYYY-MM-DD'. References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html """ return "%04d-%02d-%02d" % (self._year, self._month, self._day) @@ -1262,7 +1251,7 @@ def isocalendar(self): The first week is 1; Monday is 1 ... Sunday is 7. ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm (used with permission) """ year = self._year @@ -2139,7 +2128,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 46fa9ffcb1e056..97a629fe92ccec 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -3340,7 +3340,10 @@ def _fill_logical(self, context, opa, opb): return opa, opb def logical_and(self, other, context=None): - """Applies an 'and' operation between self and other's digits.""" + """Applies an 'and' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3357,14 +3360,20 @@ def logical_and(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_invert(self, context=None): - """Invert all its digits.""" + """Invert all its digits. + + The self must be logical number. + """ if context is None: context = getcontext() return self.logical_xor(_dec_from_triple(0,'1'*context.prec,0), context) def logical_or(self, other, context=None): - """Applies an 'or' operation between self and other's digits.""" + """Applies an 'or' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3381,7 +3390,10 @@ def logical_or(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_xor(self, other, context=None): - """Applies an 'xor' operation between self and other's digits.""" + """Applies an 'xor' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -6120,9 +6132,13 @@ def _convert_for_comparison(self, other, equality_op=False): (?Pz)? (?P\#)? (?P0)? -(?P(?!0)\d+)? +(?P\d+)? (?P[,_])? -(?:\.(?P0|(?!0)\d+))? +(?:\. + (?=[\d,_]) # lookahead for digit or separator + (?P\d+)? + (?P[,_])? +)? (?P[eEfFgGn%])? \z """, re.VERBOSE|re.DOTALL) @@ -6215,6 +6231,9 @@ def _parse_format_specifier(format_spec, _localeconv=None): format_dict['grouping'] = [3, 0] format_dict['decimal_point'] = '.' + if format_dict['frac_separators'] is None: + format_dict['frac_separators'] = '' + return format_dict def _format_align(sign, body, spec): @@ -6334,6 +6353,11 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): sign = _format_sign(is_negative, spec) + frac_sep = spec['frac_separators'] + if fracpart and frac_sep: + fracpart = frac_sep.join(fracpart[pos:pos + 3] + for pos in range(0, len(fracpart), 3)) + if fracpart or spec['alt']: fracpart = spec['decimal_point'] + fracpart diff --git a/Lib/_pyio.py b/Lib/_pyio.py index a870de5b532542..612e4a175e5a65 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -407,6 +407,9 @@ def __del__(self): if closed: return + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + # If close() fails, the caller logs the exception with # sys.unraisablehook. close() must be called at the end at __del__(). self.close() @@ -614,6 +617,8 @@ def read(self, size=-1): n = self.readinto(b) if n is None: return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] return bytes(b) @@ -645,8 +650,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -853,6 +856,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1563,7 +1570,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) @@ -1600,12 +1608,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1780,7 +1787,7 @@ def close(self): if not self.closed: self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -2689,6 +2696,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/_pyrepl/_minimal_curses.py b/Lib/_pyrepl/_minimal_curses.py deleted file mode 100644 index d884f880f50ac7..00000000000000 --- a/Lib/_pyrepl/_minimal_curses.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Minimal '_curses' module, the low-level interface for curses module -which is not meant to be used directly. - -Based on ctypes. It's too incomplete to be really called '_curses', so -to use it, you have to import it and stick it in sys.modules['_curses'] -manually. - -Note that there is also a built-in module _minimal_curses which will -hide this one if compiled in. -""" - -import ctypes -import ctypes.util - - -class error(Exception): - pass - - -def _find_clib() -> str: - trylibs = ["ncursesw", "ncurses", "curses"] - - for lib in trylibs: - path = ctypes.util.find_library(lib) - if path: - return path - raise ModuleNotFoundError("curses library not found", name="_pyrepl._minimal_curses") - - -_clibpath = _find_clib() -clib = ctypes.cdll.LoadLibrary(_clibpath) - -clib.setupterm.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_int)] -clib.setupterm.restype = ctypes.c_int - -clib.tigetstr.argtypes = [ctypes.c_char_p] -clib.tigetstr.restype = ctypes.c_ssize_t - -clib.tparm.argtypes = [ctypes.c_char_p] + 9 * [ctypes.c_int] # type: ignore[operator] -clib.tparm.restype = ctypes.c_char_p - -OK = 0 -ERR = -1 - -# ____________________________________________________________ - - -def setupterm(termstr, fd): - err = ctypes.c_int(0) - result = clib.setupterm(termstr, fd, ctypes.byref(err)) - if result == ERR: - raise error("setupterm() failed (err=%d)" % err.value) - - -def tigetstr(cap): - if not isinstance(cap, bytes): - cap = cap.encode("ascii") - result = clib.tigetstr(cap) - if result == ERR: - return None - return ctypes.cast(result, ctypes.c_char_p).value - - -def tparm(str, i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0, i8=0, i9=0): - result = clib.tparm(str, i1, i2, i3, i4, i5, i6, i7, i8, i9) - if result is None: - raise error("tparm() returned NULL") - return result diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 347f05607c75c5..cf59e007f4df80 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -1,9 +1,12 @@ from __future__ import annotations +import importlib +import os import pkgutil import sys import token import tokenize +from importlib.machinery import FileFinder from io import StringIO from contextlib import contextmanager from dataclasses import dataclass @@ -16,9 +19,18 @@ from typing import Any, Iterable, Iterator, Mapping +HARDCODED_SUBMODULES = { + # Standard library submodules that are not detected by pkgutil.iter_modules + # but can be imported, so should be proposed in completion + "collections": ["abc"], + "os": ["path"], + "xml.parsers.expat": ["errors", "model"], +} + + def make_default_module_completer() -> ModuleCompleter: - # Inside pyrepl, __package__ is set to '_pyrepl' - return ModuleCompleter(namespace={'__package__': '_pyrepl'}) + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) class ModuleCompleter: @@ -41,12 +53,13 @@ def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: self.namespace = namespace or {} self._global_cache: list[pkgutil.ModuleInfo] = [] self._curr_sys_path: list[str] = sys.path[:] + self._stdlib_path = os.path.dirname(importlib.__path__[0]) - def get_completions(self, line: str) -> list[str]: + def get_completions(self, line: str) -> list[str] | None: """Return the next possible import completions for 'line'.""" result = ImportParser(line).parse() if not result: - return [] + return None try: return self.complete(*result) except Exception: @@ -81,8 +94,11 @@ def find_modules(self, path: str, prefix: str) -> list[str]: def _find_modules(self, path: str, prefix: str) -> list[str]: if not path: # Top-level import (e.g. `import foo`` or `from foo`)` - return [name for _, name, _ in self.global_cache - if name.startswith(prefix)] + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] + return sorted(builtin_modules + third_party_modules) if path.startswith('.'): # Convert relative path to absolute path @@ -92,12 +108,33 @@ def _find_modules(self, path: str, prefix: str) -> list[str]: return [] modules: Iterable[pkgutil.ModuleInfo] = self.global_cache + is_stdlib_import: bool | None = None for segment in path.split('.'): modules = [mod_info for mod_info in modules if mod_info.ispkg and mod_info.name == segment] + if is_stdlib_import is None: + # Top-level import decide if we import from stdlib or not + is_stdlib_import = all( + self._is_stdlib_module(mod_info) for mod_info in modules + ) modules = self.iter_submodules(modules) - return [module.name for module in modules - if module.name.startswith(prefix)] + + module_names = [module.name for module in modules] + if is_stdlib_import: + module_names.extend(HARDCODED_SUBMODULES.get(path, ())) + return [module_name for module_name in module_names + if self.is_suggestion_match(module_name, prefix)] + + def _is_stdlib_module(self, module_info: pkgutil.ModuleInfo) -> bool: + return (isinstance(module_info.module_finder, FileFinder) + and module_info.module_finder.path == self._stdlib_path) + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: """Iterate over all submodules of the given parent modules.""" diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index 842599bd1877fb..0589a0f437ec7c 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -87,7 +87,7 @@ def push(self, char: int | bytes) -> None: if isinstance(k, dict): self.keymap = k else: - self.insert(Event('key', k, self.flush_buf())) + self.insert(Event('key', k, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap elif self.buf and self.buf[0] == 27: # escape @@ -96,7 +96,7 @@ def push(self, char: int | bytes) -> None: # the docstring in keymap.py trace('unrecognized escape sequence, propagating...') self.keymap = self.compiled_keymap - self.insert(Event('key', '\033', bytearray(b'\033'))) + self.insert(Event('key', '\033', b'\033')) for _c in self.flush_buf()[1:]: self.push(_c) @@ -106,5 +106,5 @@ def push(self, char: int | bytes) -> None: except UnicodeError: return else: - self.insert(Event('key', decoded, self.flush_buf())) + self.insert(Event('key', decoded, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2354fbb2ec2c1e..10127e58897a58 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -370,6 +370,13 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) + if r.paste_mode: + data = "" + ev = r.console.getpending() + data += ev.data + if data: + r.insert(data) + r.last_refresh_cache.invalidated = True class insert_nl(EditCommand): @@ -413,14 +420,17 @@ class delete(EditCommand): def do(self) -> None: r = self.reader b = r.buffer - if ( - r.pos == 0 - and len(b) == 0 # this is something of a hack - and self.event[-1] == "\004" - ): - r.update_screen() - r.console.finish() - raise EOFError + if self.event[-1] == "\004": + if b and b[-1].endswith("\n"): + self.finish = True + elif ( + r.pos == 0 + and len(b) == 0 # this is something of a hack + ): + r.update_screen() + r.console.finish() + raise EOFError + for i in range(r.get_arg()): if r.pos != len(b): del b[r.pos] @@ -484,7 +494,6 @@ def do(self) -> None: data = "" start = time.time() while done not in data: - self.reader.console.wait(100) ev = self.reader.console.getpending() data += ev.data trace( diff --git a/Lib/_pyrepl/curses.py b/Lib/_pyrepl/curses.py deleted file mode 100644 index 3a624d9f6835d1..00000000000000 --- a/Lib/_pyrepl/curses.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2000-2010 Michael Hudson-Doyle -# Armin Rigo -# -# All Rights Reserved -# -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose is hereby granted without fee, -# provided that the above copyright notice appear in all copies and -# that both that copyright notice and this permission notice appear in -# supporting documentation. -# -# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO -# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, -# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER -# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF -# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - -try: - import _curses -except ImportError: - try: - import curses as _curses # type: ignore[no-redef] - except ImportError: - from . import _minimal_curses as _curses # type: ignore[no-redef] - -setupterm = _curses.setupterm -tigetstr = _curses.tigetstr -tparm = _curses.tparm -error = _curses.error diff --git a/Lib/_pyrepl/fancy_termios.py b/Lib/_pyrepl/fancy_termios.py index 0468b9a2670267..8d5bd183f21339 100644 --- a/Lib/_pyrepl/fancy_termios.py +++ b/Lib/_pyrepl/fancy_termios.py @@ -20,19 +20,25 @@ import termios +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import cast +else: + cast = lambda typ, val: val + + class TermState: - def __init__(self, tuples): - ( - self.iflag, - self.oflag, - self.cflag, - self.lflag, - self.ispeed, - self.ospeed, - self.cc, - ) = tuples + def __init__(self, attrs: list[int | list[bytes]]) -> None: + self.iflag = cast(int, attrs[0]) + self.oflag = cast(int, attrs[1]) + self.cflag = cast(int, attrs[2]) + self.lflag = cast(int, attrs[3]) + self.ispeed = cast(int, attrs[4]) + self.ospeed = cast(int, attrs[5]) + self.cc = cast(list[bytes], attrs[6]) - def as_list(self): + def as_list(self) -> list[int | list[bytes]]: return [ self.iflag, self.oflag, @@ -45,32 +51,32 @@ def as_list(self): self.cc[:], ] - def copy(self): + def copy(self) -> "TermState": return self.__class__(self.as_list()) -def tcgetattr(fd): +def tcgetattr(fd: int) -> TermState: return TermState(termios.tcgetattr(fd)) -def tcsetattr(fd, when, attrs): +def tcsetattr(fd: int, when: int, attrs: TermState) -> None: termios.tcsetattr(fd, when, attrs.as_list()) class Term(TermState): TS__init__ = TermState.__init__ - def __init__(self, fd=0): + def __init__(self, fd: int = 0) -> None: self.TS__init__(termios.tcgetattr(fd)) self.fd = fd - self.stack = [] + self.stack: list[list[int | list[bytes]]] = [] - def save(self): + def save(self) -> None: self.stack.append(self.as_list()) - def set(self, when=termios.TCSANOW): + def set(self, when: int = termios.TCSANOW) -> None: termios.tcsetattr(self.fd, when, self.as_list()) - def restore(self): + def restore(self) -> None: self.TS__init__(self.stack.pop()) self.set() diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py index a6f824dcc4ad14..447eb1e551e774 100644 --- a/Lib/_pyrepl/main.py +++ b/Lib/_pyrepl/main.py @@ -1,6 +1,7 @@ import errno import os import sys +import types CAN_USE_PYREPL: bool @@ -29,12 +30,10 @@ def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): print(FAIL_REASON, file=sys.stderr) return sys._baserepl() - if mainmodule: - namespace = mainmodule.__dict__ - else: - import __main__ - namespace = __main__.__dict__ - namespace.pop("__pyrepl_interactive_console", None) + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ # sys._baserepl() above does this internally, we do it here startup_path = os.getenv("PYTHONSTARTUP") diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 560a9db192169e..23b8fa6b9c7625 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -43,10 +43,11 @@ Console: type[ConsoleType] _error: tuple[type[Exception], ...] | type[Exception] -try: - from .unix_console import UnixConsole as Console, _error -except ImportError: + +if os.name == "nt": from .windows_console import WindowsConsole as Console, _error +else: + from .unix_console import UnixConsole as Console, _error ENCODING = sys.getdefaultencoding() or "latin1" @@ -134,7 +135,8 @@ def get_stem(self) -> str: return "".join(b[p + 1 : self.pos]) def get_completions(self, stem: str) -> list[str]: - if module_completions := self.get_module_completions(): + module_completions = self.get_module_completions() + if module_completions is not None: return module_completions if len(stem) == 0 and self.more_lines is not None: b = self.buffer @@ -165,7 +167,7 @@ def get_completions(self, stem: str) -> list[str]: result.sort() return result - def get_module_completions(self) -> list[str]: + def get_module_completions(self) -> list[str] | None: line = self.get_line() return self.config.module_completer.get_completions(line) @@ -606,6 +608,7 @@ def _setup(namespace: Mapping[str, Any]) -> None: # set up namespace in rlcompleter, which requires it to be a bona fide dict if not isinstance(namespace, dict): namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) _wrapper.config.readline_completer = RLCompleter(namespace).complete # this is not really what readline.c does. Better than nothing I guess diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index b3848833e14208..ff1bdab9fea078 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -31,6 +31,7 @@ import sys import code import warnings +import errno from .readline import _get_reader, multiline_input, append_history_file @@ -110,6 +111,10 @@ def run_multiline_interactive_console( more_lines = functools.partial(_more_lines, console) input_n = 0 + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + def maybe_run_command(statement: str) -> bool: statement = statement.strip() if statement in console.locals or statement not in REPL_COMMANDS: @@ -149,9 +154,11 @@ def maybe_run_command(statement: str) -> bool: append_history_file() except (FileNotFoundError, PermissionError, OSError) as e: warnings.warn(f"failed to open the history file for writing: {e}") + input_n += 1 except KeyboardInterrupt: r = _get_reader() + r.cmpltn_reset() if r.input_trans is r.isearch_trans: r.do_cmd(("isearch-end", [""])) r.pos = len(r.get_unicode()) @@ -167,3 +174,8 @@ def maybe_run_command(statement: str) -> bool: except: console.showtraceback() console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/Lib/_pyrepl/terminfo.py b/Lib/_pyrepl/terminfo.py new file mode 100644 index 00000000000000..d02ef69cce0bd8 --- /dev/null +++ b/Lib/_pyrepl/terminfo.py @@ -0,0 +1,488 @@ +"""Pure Python curses-like terminal capability queries.""" + +from dataclasses import dataclass, field +import errno +import os +from pathlib import Path +import re +import struct + + +# Terminfo constants +MAGIC16 = 0o432 # Magic number for 16-bit terminfo format +MAGIC32 = 0o1036 # Magic number for 32-bit terminfo format + +# Special values for absent/cancelled capabilities +ABSENT_BOOLEAN = -1 +ABSENT_NUMERIC = -1 +CANCELLED_NUMERIC = -2 +ABSENT_STRING = None +CANCELLED_STRING = None + + +# Standard string capability names from ncurses Caps file +# This matches the order used by ncurses when compiling terminfo +# fmt: off +_STRING_NAMES: tuple[str, ...] = ( + "cbt", "bel", "cr", "csr", "tbc", "clear", "el", "ed", "hpa", "cmdch", + "cup", "cud1", "home", "civis", "cub1", "mrcup", "cnorm", "cuf1", "ll", + "cuu1", "cvvis", "dch1", "dl1", "dsl", "hd", "smacs", "blink", "bold", + "smcup", "smdc", "dim", "smir", "invis", "prot", "rev", "smso", "smul", + "ech", "rmacs", "sgr0", "rmcup", "rmdc", "rmir", "rmso", "rmul", "flash", + "ff", "fsl", "is1", "is2", "is3", "if", "ich1", "il1", "ip", "kbs", "ktbc", + "kclr", "kctab", "kdch1", "kdl1", "kcud1", "krmir", "kel", "ked", "kf0", + "kf1", "kf10", "kf2", "kf3", "kf4", "kf5", "kf6", "kf7", "kf8", "kf9", + "khome", "kich1", "kil1", "kcub1", "kll", "knp", "kpp", "kcuf1", "kind", + "kri", "khts", "kcuu1", "rmkx", "smkx", "lf0", "lf1", "lf10", "lf2", "lf3", + "lf4", "lf5", "lf6", "lf7", "lf8", "lf9", "rmm", "smm", "nel", "pad", "dch", + "dl", "cud", "ich", "indn", "il", "cub", "cuf", "rin", "cuu", "pfkey", + "pfloc", "pfx", "mc0", "mc4", "mc5", "rep", "rs1", "rs2", "rs3", "rf", "rc", + "vpa", "sc", "ind", "ri", "sgr", "hts", "wind", "ht", "tsl", "uc", "hu", + "iprog", "ka1", "ka3", "kb2", "kc1", "kc3", "mc5p", "rmp", "acsc", "pln", + "kcbt", "smxon", "rmxon", "smam", "rmam", "xonc", "xoffc", "enacs", "smln", + "rmln", "kbeg", "kcan", "kclo", "kcmd", "kcpy", "kcrt", "kend", "kent", + "kext", "kfnd", "khlp", "kmrk", "kmsg", "kmov", "knxt", "kopn", "kopt", + "kprv", "kprt", "krdo", "kref", "krfr", "krpl", "krst", "kres", "ksav", + "kspd", "kund", "kBEG", "kCAN", "kCMD", "kCPY", "kCRT", "kDC", "kDL", + "kslt", "kEND", "kEOL", "kEXT", "kFND", "kHLP", "kHOM", "kIC", "kLFT", + "kMSG", "kMOV", "kNXT", "kOPT", "kPRV", "kPRT", "kRDO", "kRPL", "kRIT", + "kRES", "kSAV", "kSPD", "kUND", "rfi", "kf11", "kf12", "kf13", "kf14", + "kf15", "kf16", "kf17", "kf18", "kf19", "kf20", "kf21", "kf22", "kf23", + "kf24", "kf25", "kf26", "kf27", "kf28", "kf29", "kf30", "kf31", "kf32", + "kf33", "kf34", "kf35", "kf36", "kf37", "kf38", "kf39", "kf40", "kf41", + "kf42", "kf43", "kf44", "kf45", "kf46", "kf47", "kf48", "kf49", "kf50", + "kf51", "kf52", "kf53", "kf54", "kf55", "kf56", "kf57", "kf58", "kf59", + "kf60", "kf61", "kf62", "kf63", "el1", "mgc", "smgl", "smgr", "fln", "sclk", + "dclk", "rmclk", "cwin", "wingo", "hup","dial", "qdial", "tone", "pulse", + "hook", "pause", "wait", "u0", "u1", "u2", "u3", "u4", "u5", "u6", "u7", + "u8", "u9", "op", "oc", "initc", "initp", "scp", "setf", "setb", "cpi", + "lpi", "chr", "cvr", "defc", "swidm", "sdrfq", "sitm", "slm", "smicm", + "snlq", "snrmq", "sshm", "ssubm", "ssupm", "sum", "rwidm", "ritm", "rlm", + "rmicm", "rshm", "rsubm", "rsupm", "rum", "mhpa", "mcud1", "mcub1", "mcuf1", + "mvpa", "mcuu1", "porder", "mcud", "mcub", "mcuf", "mcuu", "scs", "smgb", + "smgbp", "smglp", "smgrp", "smgt", "smgtp", "sbim", "scsd", "rbim", "rcsd", + "subcs", "supcs", "docr", "zerom", "csnm", "kmous", "minfo", "reqmp", + "getm", "setaf", "setab", "pfxl", "devt", "csin", "s0ds", "s1ds", "s2ds", + "s3ds", "smglr", "smgtb", "birep", "binel", "bicr", "colornm", "defbi", + "endbi", "setcolor", "slines", "dispc", "smpch", "rmpch", "smsc", "rmsc", + "pctrm", "scesc", "scesa", "ehhlm", "elhlm", "elohlm", "erhlm", "ethlm", + "evhlm", "sgr1", "slength", "OTi2", "OTrs", "OTnl", "OTbc", "OTko", "OTma", + "OTG2", "OTG3", "OTG1", "OTG4", "OTGR", "OTGL", "OTGU", "OTGD", "OTGH", + "OTGV", "OTGC","meml", "memu", "box1" +) +# fmt: on + + +def _get_terminfo_dirs() -> list[Path]: + """Get list of directories to search for terminfo files. + + Based on ncurses behavior in: + - ncurses/tinfo/db_iterator.c:_nc_next_db() + - ncurses/tinfo/read_entry.c:_nc_read_entry() + """ + dirs = [] + + terminfo = os.environ.get("TERMINFO") + if terminfo: + dirs.append(terminfo) + + try: + home = Path.home() + dirs.append(str(home / ".terminfo")) + except RuntimeError: + pass + + # Check TERMINFO_DIRS + terminfo_dirs = os.environ.get("TERMINFO_DIRS", "") + if terminfo_dirs: + for d in terminfo_dirs.split(":"): + if d: + dirs.append(d) + + dirs.extend( + [ + "/etc/terminfo", + "/lib/terminfo", + "/usr/lib/terminfo", + "/usr/share/terminfo", + "/usr/share/lib/terminfo", + "/usr/share/misc/terminfo", + "/usr/local/lib/terminfo", + "/usr/local/share/terminfo", + ] + ) + + return [Path(d) for d in dirs if Path(d).is_dir()] + + +def _validate_terminal_name_or_raise(terminal_name: str) -> None: + if not isinstance(terminal_name, str): + raise TypeError("`terminal_name` must be a string") + + if not terminal_name: + raise ValueError("`terminal_name` cannot be empty") + + if "\x00" in terminal_name: + raise ValueError("NUL character found in `terminal_name`") + + t = Path(terminal_name) + if len(t.parts) > 1: + raise ValueError("`terminal_name` cannot contain path separators") + + +def _read_terminfo_file(terminal_name: str) -> bytes: + """Find and read terminfo file for given terminal name. + + Terminfo files are stored in directories using the first character + of the terminal name as a subdirectory. + """ + _validate_terminal_name_or_raise(terminal_name) + first_char = terminal_name[0].lower() + filename = terminal_name + + for directory in _get_terminfo_dirs(): + path = directory / first_char / filename + if path.is_file(): + return path.read_bytes() + + # Try with hex encoding of first char (for special chars) + hex_dir = "%02x" % ord(first_char) + path = directory / hex_dir / filename + if path.is_file(): + return path.read_bytes() + + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) + + +# Hard-coded terminal capabilities for common terminals +# This is a minimal subset needed by PyREPL +_TERMINAL_CAPABILITIES = { + # ANSI/xterm-compatible terminals + "ansi": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column + "cud1": b"\n", # Move cursor down 1 row + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[2J", # Clear screen and home cursor + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l", # Make cursor invisible + "cnorm": b"\x1b[?12l\x1b[?25h", # Make cursor normal (visible) + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Padding (not used in modern terminals) + "pad": b"", + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1bOB", # Down arrow + "kend": b"\x1bOF", # End key + "kent": b"\x1bOM", # Enter key + "khome": b"\x1bOH", # Home key + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1bOD", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1bOC", # Right arrow + "kcuu1": b"\x1bOA", # Up arrow + # Function keys F1-F20 + "kf1": b"\x1bOP", + "kf2": b"\x1bOQ", + "kf3": b"\x1bOR", + "kf4": b"\x1bOS", + "kf5": b"\x1b[15~", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[1;2P", + "kf14": b"\x1b[1;2Q", + "kf15": b"\x1b[1;2R", + "kf16": b"\x1b[1;2S", + "kf17": b"\x1b[15;2~", + "kf18": b"\x1b[17;2~", + "kf19": b"\x1b[18;2~", + "kf20": b"\x1b[19;2~", + }, + # Dumb terminal - minimal capabilities + "dumb": { + "bel": b"\x07", # Bell + "cud1": b"\n", # Move down 1 row (newline) + "ind": b"\n", # Scroll up one line (newline) + }, + # Linux console + "linux": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column (backspace) + "cud1": b"\n", # Move cursor down 1 row (newline) + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[J", # Clear screen and home cursor (different from ansi!) + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"\x1b[@", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l\x1b[?1c", # Make cursor invisible + "cnorm": b"\x1b[?25h\x1b[?0c", # Make cursor normal + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1b[B", # Down arrow + "kend": b"\x1b[4~", # End key (different from ansi!) + "khome": b"\x1b[1~", # Home key (different from ansi!) + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1b[D", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1b[C", # Right arrow + "kcuu1": b"\x1b[A", # Up arrow + # Function keys + "kf1": b"\x1b[[A", + "kf2": b"\x1b[[B", + "kf3": b"\x1b[[C", + "kf4": b"\x1b[[D", + "kf5": b"\x1b[[E", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[25~", + "kf14": b"\x1b[26~", + "kf15": b"\x1b[28~", + "kf16": b"\x1b[29~", + "kf17": b"\x1b[31~", + "kf18": b"\x1b[32~", + "kf19": b"\x1b[33~", + "kf20": b"\x1b[34~", + }, +} + +# Map common TERM values to capability sets +_TERM_ALIASES = { + "xterm": "ansi", + "xterm-color": "ansi", + "xterm-256color": "ansi", + "screen": "ansi", + "screen-256color": "ansi", + "tmux": "ansi", + "tmux-256color": "ansi", + "vt100": "ansi", + "vt220": "ansi", + "rxvt": "ansi", + "rxvt-unicode": "ansi", + "rxvt-unicode-256color": "ansi", + "unknown": "dumb", +} + + +@dataclass +class TermInfo: + terminal_name: str | bytes | None + fallback: bool = True + + _capabilities: dict[str, bytes] = field(default_factory=dict) + + def __post_init__(self) -> None: + """Initialize terminal capabilities for the given terminal type. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_setup.c:setupterm() and _nc_setupterm() + - ncurses/tinfo/lib_setup.c:TINFO_SETUP_TERM() + + This version first attempts to read terminfo database files like ncurses, + then, if `fallback` is True, falls back to hardcoded capabilities for + common terminal types. + """ + # If termstr is None or empty, try to get from environment + if not self.terminal_name: + self.terminal_name = os.environ.get("TERM") or "ANSI" + + if isinstance(self.terminal_name, bytes): + self.terminal_name = self.terminal_name.decode("ascii") + + try: + self._parse_terminfo_file(self.terminal_name) + except (OSError, ValueError): + if not self.fallback: + raise + + term_type = _TERM_ALIASES.get( + self.terminal_name, self.terminal_name + ) + if term_type not in _TERMINAL_CAPABILITIES: + term_type = "dumb" + self._capabilities = _TERMINAL_CAPABILITIES[term_type].copy() + + def _parse_terminfo_file(self, terminal_name: str) -> None: + """Parse a terminfo file. + + Populate the _capabilities dict for easy retrieval + + Based on ncurses implementation in: + - ncurses/tinfo/read_entry.c:_nc_read_termtype() + - ncurses/tinfo/read_entry.c:_nc_read_file_entry() + - ncurses/tinfo/lib_ti.c:tigetstr() + """ + data = _read_terminfo_file(terminal_name) + too_short = f"TermInfo file for {terminal_name!r} too short" + offset = 12 + if len(data) < offset: + raise ValueError(too_short) + + magic, name_size, bool_count, num_count, str_count, str_size = ( + struct.unpack(" len(data): + raise ValueError(too_short) + + # Read string offsets + end_offset = offset + 2 * str_count + if offset > len(data): + raise ValueError(too_short) + string_offset_data = data[offset:end_offset] + string_offsets = [ + off for [off] in struct.iter_unpack(" len(data): + raise ValueError(too_short) + string_table = data[offset : offset + str_size] + + # Extract strings from string table + capabilities = {} + for cap, off in zip(_STRING_NAMES, string_offsets): + if off < 0: + # CANCELLED_STRING; we do not store those + continue + elif off < len(string_table): + # Find null terminator + end = string_table.find(0, off) + if end >= 0: + capabilities[cap] = string_table[off:end] + # in other cases this is ABSENT_STRING; we don't store those. + + # Note: we don't support extended capabilities since PyREPL doesn't + # need them. + + self._capabilities = capabilities + + def get(self, cap: str) -> bytes | None: + """Get terminal capability string by name. + """ + if not isinstance(cap, str): + raise TypeError(f"`cap` must be a string, not {type(cap)}") + + return self._capabilities.get(cap) + + +def tparm(cap_bytes: bytes, *params: int) -> bytes: + """Parameterize a terminal capability string. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_tparm.c:tparm() + - ncurses/tinfo/lib_tparm.c:tparam_internal() + + The ncurses version implements a full stack-based interpreter for + terminfo parameter strings. This pure Python version implements only + the subset of parameter substitution operations needed by PyREPL: + - %i (increment parameters for 1-based indexing) + - %p[1-9]%d (parameter substitution) + - %p[1-9]%{n}%+%d (parameter plus constant) + """ + if not isinstance(cap_bytes, bytes): + raise TypeError(f"`cap` must be bytes, not {type(cap_bytes)}") + + result = cap_bytes + + # %i - increment parameters (1-based instead of 0-based) + increment = b"%i" in result + if increment: + result = result.replace(b"%i", b"") + + # Replace %p1%d, %p2%d, etc. with actual parameter values + for i in range(len(params)): + pattern = b"%%p%d%%d" % (i + 1) + if pattern in result: + value = params[i] + if increment: + value += 1 + result = result.replace(pattern, str(value).encode("ascii")) + + # Handle %p1%{1}%+%d (parameter plus constant) + # Used in some cursor positioning sequences + pattern_re = re.compile(rb"%p(\d)%\{(\d+)\}%\+%d") + matches = list(pattern_re.finditer(result)) + for match in reversed(matches): # reversed to maintain positions + param_idx = int(match.group(1)) + constant = int(match.group(2)) + value = params[param_idx] + constant + result = ( + result[: match.start()] + + str(value).encode("ascii") + + result[match.end() :] + ) + + return result diff --git a/Lib/_pyrepl/trace.py b/Lib/_pyrepl/trace.py index a8eb2433cd3cce..943ee12f964b29 100644 --- a/Lib/_pyrepl/trace.py +++ b/Lib/_pyrepl/trace.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys # types if False: @@ -12,10 +13,22 @@ trace_file = open(trace_filename, "a") -def trace(line: str, *k: object, **kw: object) -> None: - if trace_file is None: - return - if k or kw: - line = line.format(*k, **kw) - trace_file.write(line + "\n") - trace_file.flush() + +if sys.platform == "emscripten": + from posix import _emscripten_log + + def trace(line: str, *k: object, **kw: object) -> None: + if "PYREPL_TRACE" not in os.environ: + return + if k or kw: + line = line.format(*k, **kw) + _emscripten_log(line) + +else: + def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index d21cdd9b076d86..09247de748ee3b 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -33,9 +33,9 @@ import platform from fcntl import ioctl -from . import curses +from . import terminfo from .console import Console, Event -from .fancy_termios import tcgetattr, tcsetattr +from .fancy_termios import tcgetattr, tcsetattr, TermState from .trace import trace from .unix_eventqueue import EventQueue from .utils import wlen @@ -51,16 +51,19 @@ # types if TYPE_CHECKING: - from typing import IO, Literal, overload + from typing import AbstractSet, IO, Literal, overload, cast else: overload = lambda func: None + cast = lambda typ, val: val class InvalidTerminal(RuntimeError): - pass + def __init__(self, message: str) -> None: + super().__init__(errno.EIO, message) -_error = (termios.error, curses.error, InvalidTerminal) +_error = (termios.error, InvalidTerminal) +_error_codes_to_ignore = frozenset([errno.EIO, errno.ENXIO, errno.EPERM]) SIGWINCH_EVENT = "repaint" @@ -125,12 +128,13 @@ def __init__(self): def register(self, fd, flag): self.fd = fd + # note: The 'timeout' argument is received as *milliseconds* def poll(self, timeout: float | None = None) -> list[int]: if timeout is None: r, w, e = select.select([self.fd], [], []) else: - r, w, e = select.select([self.fd], [], [], timeout/1000) + r, w, e = select.select([self.fd], [], [], timeout / 1000) return r poll = MinimalPoll # type: ignore[assignment] @@ -157,17 +161,28 @@ def __init__( self.pollob = poll() self.pollob.register(self.input_fd, select.POLLIN) - curses.setupterm(term or None, self.output_fd) + self.terminfo = terminfo.TermInfo(term or None) self.term = term + self.is_apple_terminal = ( + platform.system() == "Darwin" + and os.getenv("TERM_PROGRAM") == "Apple_Terminal" + ) + + try: + self.__input_fd_set(tcgetattr(self.input_fd), ignore=frozenset()) + except _error as e: + raise RuntimeError(f"termios failure ({e.args[1]})") @overload - def _my_getstr(cap: str, optional: Literal[False] = False) -> bytes: ... + def _my_getstr( + cap: str, optional: Literal[False] = False + ) -> bytes: ... @overload def _my_getstr(cap: str, optional: bool) -> bytes | None: ... def _my_getstr(cap: str, optional: bool = False) -> bytes | None: - r = curses.tigetstr(cap) + r = self.terminfo.get(cap) if not optional and r is None: raise InvalidTerminal( f"terminal doesn't have the required {cap} capability" @@ -201,7 +216,9 @@ def _my_getstr(cap: str, optional: bool = False) -> bytes | None: self.__setup_movement() - self.event_queue = EventQueue(self.input_fd, self.encoding) + self.event_queue = EventQueue( + self.input_fd, self.encoding, self.terminfo + ) self.cursor_visible = 1 signal.signal(signal.SIGCONT, self._sigcont_handler) @@ -213,7 +230,6 @@ def _sigcont_handler(self, signum, frame): def __read(self, n: int) -> bytes: return os.read(self.input_fd, n) - def change_encoding(self, encoding: str) -> None: """ Change the encoding used for I/O operations. @@ -325,6 +341,8 @@ def prepare(self): """ Prepare the console for input/output operations. """ + self.__buffer = [] + self.__svtermstate = tcgetattr(self.input_fd) raw = self.__svtermstate.copy() raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) @@ -336,17 +354,15 @@ def prepare(self): raw.lflag |= termios.ISIG raw.cc[termios.VMIN] = 1 raw.cc[termios.VTIME] = 0 - tcsetattr(self.input_fd, termios.TCSADRAIN, raw) + self.__input_fd_set(raw) # In macOS terminal we need to deactivate line wrap via ANSI escape code - if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + if self.is_apple_terminal: os.write(self.output_fd, b"\033[?7l") self.screen = [] self.height, self.width = self.getheightwidth() - self.__buffer = [] - self.posxy = 0, 0 self.__gone_tall = 0 self.__move = self.__move_short @@ -368,13 +384,18 @@ def restore(self): self.__disable_bracketed_paste() self.__maybe_write_code(self._rmkx) self.flushoutput() - tcsetattr(self.input_fd, termios.TCSADRAIN, self.__svtermstate) + self.__input_fd_set(self.__svtermstate) - if platform.system() == "Darwin" and os.getenv("TERM_PROGRAM") == "Apple_Terminal": + if self.is_apple_terminal: os.write(self.output_fd, b"\033[?7h") if hasattr(self, "old_sigwinch"): - signal.signal(signal.SIGWINCH, self.old_sigwinch) + try: + signal.signal(signal.SIGWINCH, self.old_sigwinch) + except ValueError as e: + import threading + if threading.current_thread() is threading.main_thread(): + raise e del self.old_sigwinch def push_char(self, char: int | bytes) -> None: @@ -407,6 +428,8 @@ def get_event(self, block: bool = True) -> Event | None: return self.event_queue.get() else: continue + elif err.errno == errno.EIO: + raise SystemExit(errno.EIO) else: raise else: @@ -597,14 +620,14 @@ def __setup_movement(self): if self._dch1: self.dch1 = self._dch1 elif self._dch: - self.dch1 = curses.tparm(self._dch, 1) + self.dch1 = terminfo.tparm(self._dch, 1) else: self.dch1 = None if self._ich1: self.ich1 = self._ich1 elif self._ich: - self.ich1 = curses.tparm(self._ich, 1) + self.ich1 = terminfo.tparm(self._ich, 1) else: self.ich1 = None @@ -701,7 +724,7 @@ def __write(self, text): self.__buffer.append((text, 0)) def __write_code(self, fmt, *args): - self.__buffer.append((curses.tparm(fmt, *args), 1)) + self.__buffer.append((terminfo.tparm(fmt, *args), 1)) def __maybe_write_code(self, fmt, *args): if fmt: @@ -803,3 +826,17 @@ def __tputs(self, fmt, prog=delayprog): os.write(self.output_fd, self._pad * nchars) else: time.sleep(float(delay) / 1000.0) + + def __input_fd_set( + self, + state: TermState, + ignore: AbstractSet[int] = _error_codes_to_ignore, + ) -> bool: + try: + tcsetattr(self.input_fd, termios.TCSADRAIN, state) + except termios.error as te: + if te.args[0] not in ignore: + raise + return False + else: + return True diff --git a/Lib/_pyrepl/unix_eventqueue.py b/Lib/_pyrepl/unix_eventqueue.py index 29b3e9dd5efd07..2a9cca59e7477f 100644 --- a/Lib/_pyrepl/unix_eventqueue.py +++ b/Lib/_pyrepl/unix_eventqueue.py @@ -18,7 +18,7 @@ # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -from . import curses +from .terminfo import TermInfo from .trace import trace from .base_eventqueue import BaseEventQueue from termios import tcgetattr, VERASE @@ -54,22 +54,23 @@ b'\033Oc': 'ctrl right', } -def get_terminal_keycodes() -> dict[bytes, str]: +def get_terminal_keycodes(ti: TermInfo) -> dict[bytes, str]: """ Generates a dictionary mapping terminal keycodes to human-readable names. """ keycodes = {} for key, terminal_code in TERMINAL_KEYNAMES.items(): - keycode = curses.tigetstr(terminal_code) + keycode = ti.get(terminal_code) trace('key {key} tiname {terminal_code} keycode {keycode!r}', **locals()) if keycode: keycodes[keycode] = key keycodes.update(CTRL_ARROW_KEYCODES) return keycodes + class EventQueue(BaseEventQueue): - def __init__(self, fd: int, encoding: str) -> None: - keycodes = get_terminal_keycodes() + def __init__(self, fd: int, encoding: str, ti: TermInfo) -> None: + keycodes = get_terminal_keycodes(ti) if os.isatty(fd): backspace = tcgetattr(fd)[6][VERASE] keycodes[backspace] = "backspace" diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 38cf6b5a08e892..06cddef851bb40 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -20,6 +20,7 @@ ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) IDENTIFIERS_AFTER = {"def", "class"} +KEYWORD_CONSTANTS = {"True", "False", "None"} BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} @@ -41,9 +42,15 @@ def from_re(cls, m: Match[str], group: int | str) -> Self: @classmethod def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + return cls( line_len[token.start[0] - 1] + token.start[1], - line_len[token.end[0] - 1] + token.end[1] - 1, + line_len[token.end[0] - 1] + token.end[1] + end_offset, ) @@ -56,6 +63,12 @@ class ColorSpan(NamedTuple): def str_width(c: str) -> int: if ord(c) < 128: return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 w = unicodedata.east_asian_width(c) if w in ("N", "Na", "H", "A"): return 1 @@ -102,6 +115,8 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: for color in gen_colors_from_token_stream(gen, line_lengths): yield color last_emitted = color + except SyntaxError: + return except tokenize.TokenError as te: yield from recover_unterminated_string( te, line_lengths, last_emitted, buffer @@ -189,8 +204,11 @@ def gen_colors_from_token_stream( span = Span.from_token(token, line_lengths) yield ColorSpan(span, "definition") elif keyword.iskeyword(token.string): + span_cls = "keyword" + if token.string in KEYWORD_CONSTANTS: + span_cls = "keyword_constant" span = Span.from_token(token, line_lengths) - yield ColorSpan(span, "keyword") + yield ColorSpan(span, span_cls) if token.string in IDENTIFIERS_AFTER: is_def_name = True elif ( @@ -200,7 +218,10 @@ def gen_colors_from_token_stream( ): span = Span.from_token(token, line_lengths) yield ColorSpan(span, "soft_keyword") - elif token.string in BUILTINS: + elif ( + token.string in BUILTINS + and not (prev_token and prev_token.exact_type == T.DOT) + ): span = Span.from_token(token, line_lengths) yield ColorSpan(span, "builtin") @@ -233,14 +254,14 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: return s in keyword_first_sets_match return True case ( - None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), TI(string="case"), TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) | TI(T.OP, string="(" | "*" | "-" | "[" | "{") ): return True case ( - None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), TI(string="case"), TI(T.NAME, string=s) ): @@ -249,6 +270,12 @@ def is_soft_keyword_used(*tokens: TI | None) -> bool: return True case (TI(string="case"), TI(string="_"), TI(string=":")): return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="type"), + TI(T.NAME, string=s) + ): + return not keyword.iskeyword(s) case _: return False diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 95749198b3b2f9..c56dcd6d7dd434 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -419,10 +419,7 @@ def _getscrollbacksize(self) -> int: return info.srWindow.Bottom # type: ignore[no-any-return] - def _read_input(self, block: bool = True) -> INPUT_RECORD | None: - if not block and not self.wait(timeout=0): - return None - + def _read_input(self) -> INPUT_RECORD | None: rec = INPUT_RECORD() read = DWORD() if not ReadConsoleInput(InHandle, rec, 1, read): @@ -431,14 +428,10 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec def _read_input_bulk( - self, block: bool, n: int + self, n: int ) -> tuple[ctypes.Array[INPUT_RECORD], int]: rec = (n * INPUT_RECORD)() read = DWORD() - - if not block and not self.wait(timeout=0): - return rec, 0 - if not ReadConsoleInput(InHandle, rec, n, read): raise WinError(GetLastError()) @@ -449,8 +442,11 @@ def get_event(self, block: bool = True) -> Event | None: and there is no event pending, otherwise waits for the completion of an event.""" + if not block and not self.wait(timeout=0): + return None + while self.event_queue.empty(): - rec = self._read_input(block) + rec = self._read_input() if rec is None: return None @@ -551,12 +547,20 @@ def getpending(self) -> Event: if e2: e.data += e2.data - recs, rec_count = self._read_input_bulk(False, 1024) + recs, rec_count = self._read_input_bulk(1024) for i in range(rec_count): rec = recs[i] + # In case of a legacy console, we do not only receive a keydown + # event, but also a keyup event - and for uppercase letters + # an additional SHIFT_PRESSED event. if rec and rec.EventType == KEY_EVENT: key_event = rec.Event.KeyEvent + if not key_event.bKeyDown: + continue ch = key_event.uChar.UnicodeChar + if ch == "\x00": + # ignore SHIFT_PRESSED and special keys + continue if ch == "\r": ch += "\n" e.data += ch diff --git a/Lib/_strptime.py b/Lib/_strptime.py index aa63933a49d16d..fc7e369c3d1e26 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -41,6 +42,29 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -84,6 +108,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -119,9 +144,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -129,26 +188,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -172,9 +237,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -305,23 +374,56 @@ def __init__(self, locale_time=None): 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -337,8 +439,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -365,7 +468,7 @@ def repl(m): nonlocal day_of_month_in_format day_of_month_in_format = True return self[format_char] - format = re_sub(r'%([OE]?\\?.?)', repl, format) + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -467,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -474,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -513,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index c0b1d4395d14ed..a5788cdbfae3f5 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -27,6 +27,9 @@ class Format(enum.IntEnum): _sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + # Slots shared by ForwardRef and _Stringifier. The __forward__ names must be # preserved for compatibility with the old typing.ForwardRef class. The remaining @@ -82,6 +85,9 @@ def __init__( # These are always set to None here but may be non-None if a ForwardRef # is created through __class__ assignment on a _Stringifier object. self.__globals__ = None + # This may be either a cell object (for a ForwardRef referring to a single name) + # or a dict mapping cell names to cell objects (for a ForwardRef containing references + # to multiple names). self.__cell__ = None self.__extra_names__ = None # These are initially None but serve as a cache and may be set to a non-None @@ -114,7 +120,7 @@ def evaluate( is_forwardref_format = True case _: raise NotImplementedError(format) - if self.__cell__ is not None: + if isinstance(self.__cell__, types.CellType): try: return self.__cell__.cell_contents except ValueError: @@ -144,34 +150,42 @@ def evaluate( if globals is None: globals = {} + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + if locals is None: locals = {} if isinstance(owner, type): locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) - if type_params is None and owner is not None: - # "Inject" type parameters into the local namespace - # (unless they are shadowed by assignments *in* the local namespace), - # as a way of emulating annotation scopes when calling `eval()` - type_params = getattr(owner, "__type_params__", None) - - # type parameters require some special handling, - # as they exist in their own scope - # but `eval()` does not have a dedicated parameter for that scope. - # For classes, names in type parameter scopes should override - # names in the global scope (which here are called `localns`!), - # but should in turn be overridden by names in the class scope - # (which here are called `globalns`!) + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` if type_params is not None: - globals = dict(globals) - locals = dict(locals) for param in type_params: - param_name = param.__name__ - if not self.__forward_is_class__ or param_name not in globals: - globals[param_name] = param - locals.pop(param_name, None) + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. + if isinstance(self.__cell__, dict): + for cell_name, cell in self.__cell__.items(): + try: + cell_value = cell.cell_contents + except ValueError: + pass + else: + locals.setdefault(cell_name, cell_value) + if self.__extra_names__: - locals = {**locals, **self.__extra_names__} + locals.update(self.__extra_names__) arg = self.__forward_arg__ if arg.isidentifier() and not keyword.iskeyword(arg): @@ -184,7 +198,7 @@ def evaluate( elif is_forwardref_format: return self else: - raise NameError(arg) + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) else: code = self.__forward_code__ try: @@ -192,8 +206,11 @@ def evaluate( except Exception: if not is_forwardref_format: raise + + # All variables, in scoping order, should be checked before + # triggering __missing__ to create a _Stringifier. new_locals = _StringifierDict( - {**builtins.__dict__, **locals}, + {**builtins.__dict__, **globals, **locals}, globals=globals, owner=owner, is_class=self.__forward_is_class__, @@ -204,7 +221,7 @@ def evaluate( except Exception: return self else: - new_locals.transmogrify() + new_locals.transmogrify(self.__cell__) return result def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): @@ -246,15 +263,8 @@ def __forward_code__(self): if self.__code__ is not None: return self.__code__ arg = self.__forward_arg__ - # If we do `def f(*args: *Ts)`, then we'll have `arg = '*Ts'`. - # Unfortunately, this isn't a valid expression on its own, so we - # do the unpacking manually. - if arg.startswith("*"): - arg_to_compile = f"({arg},)[0]" # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] - else: - arg_to_compile = arg try: - self.__code__ = compile(arg_to_compile, "", "eval") + self.__code__ = compile(_rewrite_star_unpack(arg), "", "eval") except SyntaxError: raise SyntaxError(f"Forward reference must be an expression -- got {arg!r}") return self.__code__ @@ -283,7 +293,7 @@ def __hash__(self): self.__forward_module__, id(self.__globals__), # dictionaries are not hashable, so hash by identity self.__forward_is_class__, - self.__cell__, + tuple(sorted(self.__cell__.items())) if isinstance(self.__cell__, dict) else self.__cell__, self.__owner__, tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None, )) @@ -305,6 +315,9 @@ def __repr__(self): return f"ForwardRef({self.__forward_arg__!r}{''.join(extra)})" +_Template = type(t"") + + class _Stringifier: # Must match the slots on ForwardRef, so we can turn an instance of one into an # instance of the other in place. @@ -341,6 +354,8 @@ def __convert_to_ast(self, other): if isinstance(other.__ast_node__, str): return ast.Name(id=other.__ast_node__), other.__extra_names__ return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None elif ( # In STRING format we don't bother with the create_unique_name() dance; # it's better to emit the repr() of the object instead of an opaque name. @@ -560,6 +575,70 @@ def unary_op(self): del _make_unary_op +def _template_to_ast_constructor(template): + """Convert a `template` instance to a non-literal AST.""" + args = [] + for part in template: + match part: + case str(): + args.append(ast.Constant(value=part)) + case _: + interp = ast.Call( + func=ast.Name(id="Interpolation"), + args=[ + ast.Constant(value=part.value), + ast.Constant(value=part.expression), + ast.Constant(value=part.conversion), + ast.Constant(value=part.format_spec), + ] + ) + args.append(interp) + return ast.Call(func=ast.Name(id="Template"), args=args, keywords=[]) + + +def _template_to_ast_literal(template, parsed): + """Convert a `template` instance to a t-string literal AST.""" + values = [] + interp_count = 0 + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + case _: + interp = ast.Interpolation( + str=part.expression, + value=parsed[interp_count], + conversion=ord(part.conversion) if part.conversion else -1, + format_spec=ast.Constant(value=part.format_spec) + if part.format_spec + else None, + ) + values.append(interp) + interp_count += 1 + return ast.TemplateStr(values=values) + + +def _template_to_ast(template): + """Make a best-effort conversion of a `template` instance to an AST.""" + # gh-138558: Not all Template instances can be represented as t-string + # literals. Return the most accurate AST we can. See issue for details. + + # If any expr is empty or whitespace only, we cannot convert to a literal. + if any(part.expression.strip() == "" for part in template.interpolations): + return _template_to_ast_constructor(template) + + try: + # Wrap in parens to allow whitespace inside interpolation curly braces + parsed = tuple( + ast.parse(f"({part.expression})", mode="eval").body + for part in template.interpolations + ) + except SyntaxError: + return _template_to_ast_constructor(template) + + return _template_to_ast_literal(template, parsed) + + class _StringifierDict(dict): def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): super().__init__(namespace) @@ -582,13 +661,15 @@ def __missing__(self, key): self.stringifiers.append(fwdref) return fwdref - def transmogrify(self): + def transmogrify(self, cell_dict): for obj in self.stringifiers: obj.__class__ = ForwardRef obj.__stringifier_dict__ = None # not needed for ForwardRef if isinstance(obj.__ast_node__, str): obj.__arg__ = obj.__ast_node__ obj.__ast_node__ = None + if cell_dict is not None and obj.__cell__ is None: + obj.__cell__ = cell_dict def create_unique_name(self): name = f"__annotationlib_name_{self.next_id}__" @@ -638,9 +719,21 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): # possibly constants if the annotate function uses them directly). We then # convert each of those into a string to get an approximation of the # original source. + + # Attempt to call with VALUE_WITH_FAKE_GLOBALS to check if it is implemented + # See: https://github.com/python/cpython/issues/138764 + # Only fail on NotImplementedError + try: + annotate(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # Both STRING and VALUE_WITH_FAKE_GLOBALS are not implemented: fallback to VALUE + return annotations_to_string(annotate(Format.VALUE)) + except Exception: + pass + globals = _StringifierDict({}, format=format) is_class = isinstance(owner, type) - closure = _build_closure( + closure, _ = _build_closure( annotate, owner, is_class, globals, allow_evaluation=False ) func = types.FunctionType( @@ -684,7 +777,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): is_class=is_class, format=format, ) - closure = _build_closure( + closure, cell_dict = _build_closure( annotate, owner, is_class, globals, allow_evaluation=True ) func = types.FunctionType( @@ -696,10 +789,13 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): ) try: result = func(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # FORWARDREF and VALUE_WITH_FAKE_GLOBALS not supported, fall back to VALUE + return annotate(Format.VALUE) except Exception: pass else: - globals.transmogrify() + globals.transmogrify(cell_dict) return result # Try again, but do not provide any globals. This allows us to return @@ -711,7 +807,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): is_class=is_class, format=format, ) - closure = _build_closure( + closure, cell_dict = _build_closure( annotate, owner, is_class, globals, allow_evaluation=False ) func = types.FunctionType( @@ -722,7 +818,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): kwdefaults=annotate.__kwdefaults__, ) result = func(Format.VALUE_WITH_FAKE_GLOBALS) - globals.transmogrify() + globals.transmogrify(cell_dict) if _is_evaluate: if isinstance(result, ForwardRef): return result.evaluate(format=Format.FORWARDREF) @@ -747,14 +843,11 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): if not annotate.__closure__: - return None - freevars = annotate.__code__.co_freevars + return None, None new_closure = [] - for i, cell in enumerate(annotate.__closure__): - if i < len(freevars): - name = freevars[i] - else: - name = "__cell__" + cell_dict = {} + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): + cell_dict[name] = cell new_cell = None if allow_evaluation: try: @@ -775,7 +868,7 @@ def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluat stringifier_dict.stringifiers.append(fwdref) new_cell = types.CellType(fwdref) new_closure.append(new_cell) - return tuple(new_closure) + return tuple(new_closure), cell_dict def _stringify_single(anno): @@ -784,6 +877,8 @@ def _stringify_single(anno): # We have to handle str specially to support PEP 563 stringified annotations. elif isinstance(anno, str): return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) else: return repr(anno) @@ -906,48 +1001,49 @@ def get_annotations( if not eval_str: return dict(ann) - if isinstance(obj, type): - # class - obj_globals = None - module_name = getattr(obj, "__module__", None) - if module_name: - module = sys.modules.get(module_name, None) - if module: - obj_globals = getattr(module, "__dict__", None) - obj_locals = dict(vars(obj)) - unwrap = obj - elif isinstance(obj, types.ModuleType): - # module - obj_globals = getattr(obj, "__dict__") - obj_locals = None - unwrap = None - elif callable(obj): - # this includes types.Function, types.BuiltinFunctionType, - # types.BuiltinMethodType, functools.partial, functools.singledispatch, - # "class funclike" from Lib/test/test_inspect... on and on it goes. - obj_globals = getattr(obj, "__globals__", None) - obj_locals = None - unwrap = obj - else: - obj_globals = obj_locals = unwrap = None - - if unwrap is not None: - while True: - if hasattr(unwrap, "__wrapped__"): - unwrap = unwrap.__wrapped__ - continue - if functools := sys.modules.get("functools"): - if isinstance(unwrap, functools.partial): - unwrap = unwrap.func + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ continue - break - if hasattr(unwrap, "__globals__"): - obj_globals = unwrap.__globals__ + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ - if globals is None: - globals = obj_globals - if locals is None: - locals = obj_locals + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals # "Inject" type parameters into the local namespace # (unless they are shadowed by assignments *in* the local namespace), @@ -958,7 +1054,8 @@ def get_annotations( locals = {param.__name__: param for param in type_params} | locals return_value = { - key: value if not isinstance(value, str) else eval(value, globals, locals) + key: value if not isinstance(value, str) + else eval(_rewrite_star_unpack(value), globals, locals) for key, value in ann.items() } return return_value @@ -976,6 +1073,9 @@ def type_repr(value): if value.__module__ == "builtins": return value.__qualname__ return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) if value is ...: return "..." return repr(value) @@ -992,6 +1092,16 @@ def annotations_to_string(annotations): } +def _rewrite_star_unpack(arg): + """If the given argument annotation expression is a star unpack e.g. `'*Ts'` + rewrite it to a valid expression. + """ + if arg.startswith("*"): + return f"({arg},)[0]" # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] + else: + return arg + + def _get_and_call_annotate(obj, format): """Get the __annotate__ function and call it. @@ -1006,14 +1116,27 @@ def _get_and_call_annotate(obj, format): return None +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + def _get_dunder_annotations(obj): """Return the annotations for an object, checking that it is a dictionary. Does not return a fresh dictionary. """ - ann = getattr(obj, "__annotations__", None) - if ann is None: - return None + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None if not isinstance(ann, dict): raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") diff --git a/Lib/argparse.py b/Lib/argparse.py index f13ac82dbc50b3..1d7d34f9924326 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -167,8 +167,7 @@ def __init__( indent_increment=2, max_help_position=24, width=None, - prefix_chars='-', - color=False, + color=True, ): # default setting for width if width is None: @@ -176,16 +175,7 @@ def __init__( width = shutil.get_terminal_size().columns width -= 2 - from _colorize import can_colorize, decolor, get_theme - - if color and can_colorize(): - self._theme = get_theme(force_color=True).argparse - self._decolor = decolor - else: - self._theme = get_theme(force_no_color=True).argparse - self._decolor = lambda text: text - - self._prefix_chars = prefix_chars + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -202,9 +192,20 @@ def __init__( self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = lambda text: text + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -256,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -279,7 +281,7 @@ def add_argument(self, action): if action.help is not SUPPRESS: # find all invocations - get_invocation = self._format_action_invocation + get_invocation = lambda x: self._decolor(self._format_action_invocation(x)) invocation_lengths = [len(get_invocation(action)) + self._current_indent] for subaction in self._iter_indented_subactions(action): invocation_lengths.append(len(get_invocation(subaction)) + self._current_indent) @@ -299,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -334,27 +337,17 @@ def _format_usage(self, usage, actions, groups, prefix): elif usage is None: prog = '%(prog)s' % dict(prog=self._prog) - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - + parts, pos_start = self._get_actions_usage_parts(actions, groups) # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) + usage = ' '.join(filter(None, [prog, *parts])) # wrap the usage parts if it's too long text_width = self._width - self._current_indent if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - opt_parts = self._get_actions_usage_parts(optionals, groups) - pos_parts = self._get_actions_usage_parts(positionals, groups) + opt_parts = parts[:pos_start] + pos_parts = parts[pos_start:] # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -411,120 +404,114 @@ def get_lines(parts, indent, prefix=None): # prefix with 'usage:' return f'{t.usage}{prefix}{t.reset}{usage}\n\n' - def _format_actions_usage(self, actions, groups): - return ' '.join(self._get_actions_usage_parts(actions, groups)) - def _is_long_option(self, string): - return len(string) >= 2 and string[1] in self._prefix_chars - - def _is_short_option(self, string): - return ( - not self._is_long_option(string) - and len(string) >= 1 - and string[0] in self._prefix_chars - ) + return len(string) > 2 def _get_actions_usage_parts(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - if not group._group_actions: - raise ValueError(f'empty group {group}') - - if all(action.help is SUPPRESS for action in group._group_actions): - continue + """Get usage parts with split index for optionals/positionals. - try: - start = min(actions.index(item) for item in group._group_actions) - except ValueError: - continue - else: - end = start + len(group._group_actions) - if set(actions[start:end]) == set(group._group_actions): - group_actions.update(group._group_actions) - inserts[start, end] = group + Returns (parts, pos_start) where pos_start is the index in parts + where positionals begin. + This preserves mutually exclusive group formatting across the + optionals/positionals boundary (gh-75949). + """ + actions = [action for action in actions if action.help is not SUPPRESS] + # group actions by mutually exclusive groups + action_groups = dict.fromkeys(actions) + for group in groups: + for action in group._group_actions: + if action in action_groups: + action_groups[action] = group + # positional arguments keep their position + positionals = [] + for action in actions: + if not action.option_strings: + group = action_groups.pop(action) + if group: + group_actions = [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + [action] + positionals.append((group.required, group_actions)) + else: + positionals.append((None, [action])) + # the remaining optional arguments are sorted by the position of + # the first option in the group + optionals = [] + for action in actions: + if action.option_strings and action in action_groups: + group = action_groups.pop(action) + if group: + group_actions = [action] + [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + optionals.append((group.required, group_actions)) + else: + optionals.append((None, [action])) # collect all actions format strings parts = [] t = self._theme - for action in actions: - - # suppressed arguments are marked with None - if action.help is SUPPRESS: - part = None - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = ( - t.summary_action - + self._format_args(action, default) - + t.reset - ) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] - - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = action.format_usage() - if self._is_long_option(part): - part = f"{t.summary_long_option}{part}{t.reset}" - elif self._is_short_option(part): - part = f"{t.summary_short_option}{part}{t.reset}" - - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS + pos_start = None + for i, (required, group) in enumerate(optionals + positionals): + start = len(parts) + if i == len(optionals): + pos_start = start + in_group = len(group) > 1 + for action in group: + # produce all arg strings + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + # if it's in a group, strip the outer [] + if in_group: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + part = t.summary_action + part + t.reset + + # produce the first way to invoke the option in brackets else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) + option_string = action.option_strings[0] if self._is_long_option(option_string): option_color = t.summary_long_option - elif self._is_short_option(option_string): + else: option_color = t.summary_short_option - part = ( - f"{option_color}{option_string} " - f"{t.summary_label}{args_string}{t.reset}" - ) - - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part - - # add the action string to the list - parts.append(part) - - # group mutually exclusive actions - inserted_separators_indices = set() - for start, end in sorted(inserts, reverse=True): - group = inserts[start, end] - group_parts = [item for item in parts[start:end] if item is not None] - group_size = len(group_parts) - if group.required: - open, close = "()" if group_size > 1 else ("", "") - else: - open, close = "[]" - group_parts[0] = open + group_parts[0] - group_parts[-1] = group_parts[-1] + close - for i, part in enumerate(group_parts[:-1], start=start): - # insert a separator if not already done in a nested group - if i not in inserted_separators_indices: - parts[i] = part + ' |' - inserted_separators_indices.add(i) - parts[start + group_size - 1] = group_parts[-1] - for i in range(start + group_size, end): - parts[i] = None - - # return the usage parts - return [item for item in parts if item is not None] + + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = action.format_usage() + part = f"{option_color}{part}{t.reset}" + + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) + + # make it look optional if it's not required or in a group + if not (action.required or required or in_group): + part = '[%s]' % part + + # add the action string to the list + parts.append(part) + + if in_group: + parts[start] = ('(' if required else '[') + parts[start] + for i in range(start, len(parts) - 1): + parts[i] += ' |' + parts[-1] += ')' if required else ']' + + if pos_start is None: + pos_start = len(parts) + return parts, pos_start def _format_text(self, text): if '%(prog)' in text: @@ -606,10 +593,8 @@ def color_option_strings(strings): for s in strings: if self._is_long_option(s): parts.append(f"{t.long_option}{s}{t.reset}") - elif self._is_short_option(s): - parts.append(f"{t.short_option}{s}{t.reset}") else: - parts.append(s) + parts.append(f"{t.short_option}{s}{t.reset}") return parts # if the Optional doesn't take a value, format is: @@ -754,11 +739,14 @@ def _get_help_string(self, action): if help is None: help = '' - if '%(default)' not in help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += _(' (default: %(default)s)') + if ( + '%(default)' not in help + and action.default is not SUPPRESS + and not action.required + ): + defaulting_nargs = (OPTIONAL, ZERO_OR_MORE) + if action.option_strings or action.nargs in defaulting_nargs: + help += _(' (default: %(default)s)') return help @@ -1240,7 +1228,7 @@ def __init__(self, self._name_parser_map = {} self._choices_actions = [] self._deprecated = set() - self._color = False + self._color = True super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1479,6 +1467,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1489,6 +1478,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1508,6 +1498,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1540,7 +1531,7 @@ def add_argument(self, *args, **kwargs): action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action {action_class!r}') + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) # raise an error if action for positional argument does not @@ -1558,8 +1549,8 @@ def add_argument(self, *args, **kwargs): f'instance of it must be passed') # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - formatter = self._get_formatter() + if hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: formatter._format_args(action, None) except TypeError: @@ -1747,8 +1738,8 @@ def _handle_conflict_resolve(self, action, conflicting_actions): action.container._remove_action(action) def _check_help(self, action): - if action.help and hasattr(self, "_get_formatter"): - formatter = self._get_formatter() + if action.help and hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: formatter._expand_help(action) except (ValueError, TypeError, KeyError) as exc: @@ -1884,7 +1875,7 @@ def __init__(self, exit_on_error=True, *, suggest_on_error=False, - color=False, + color=True, ): superinit = super(ArgumentParser, self).__init__ superinit(description=description, @@ -1903,6 +1894,9 @@ def __init__(self, self.suggest_on_error = suggest_on_error self.color = color + # Cached formatter for validation (avoids repeated _set_color calls) + self._cached_formatter = None + add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) self._optionals = add_group(_('options')) @@ -1933,6 +1927,7 @@ def identity(string): # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1947,6 +1942,7 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: raise ValueError('cannot have multiple subparser arguments') @@ -1964,7 +1960,9 @@ def add_subparsers(self, **kwargs): # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix if kwargs.get('prog') is None: - formatter = self._get_formatter() + # Create formatter without color to avoid storing ANSI codes in prog + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(False) positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups formatter.add_usage(None, positionals, groups, '') @@ -2000,6 +1998,7 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: @@ -2594,6 +2593,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # ======================== # Value conversion methods # ======================== + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2693,6 +2693,7 @@ def _check_value(self, action, value): # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2723,20 +2724,21 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - if isinstance(self.formatter_class, type) and issubclass( - self.formatter_class, HelpFormatter - ): - return self.formatter_class( - prog=self.prog, - prefix_chars=self.prefix_chars, - color=self.color, - ) - else: - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter + + def _get_validation_formatter(self): + # Return cached formatter for read-only validation operations + # (_expand_help and _format_args). Avoids repeated slow _set_color calls. + if self._cached_formatter is None: + self._cached_formatter = self._get_formatter() + return self._cached_formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2758,6 +2760,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) diff --git a/Lib/ast.py b/Lib/ast.py index b9791bf52d3e08..2f11683ecf7c68 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -147,18 +147,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue - if ( - not show_empty - and (value is None or value == []) - # Special cases: - # `Constant(value=None)` and `MatchSingleton(value=None)` - and not isinstance(node, (Constant, MatchSingleton)) - ): - args_buffer.append(repr(value)) - continue - elif not keywords: - args.extend(args_buffer) - args_buffer = [] + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: diff --git a/Lib/asyncio/__init__.py b/Lib/asyncio/__init__.py index 4be7112fa017d4..32a5dbae03af21 100644 --- a/Lib/asyncio/__init__.py +++ b/Lib/asyncio/__init__.py @@ -51,15 +51,24 @@ def __getattr__(name: str): import warnings - deprecated = { - "AbstractEventLoopPolicy", - "DefaultEventLoopPolicy", - "WindowsSelectorEventLoopPolicy", - "WindowsProactorEventLoopPolicy", - } - if name in deprecated: - warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) - # deprecated things have underscores in front of them - return globals()["_" + name] + match name: + case "AbstractEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return events._AbstractEventLoopPolicy + case "DefaultEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + if sys.platform == 'win32': + return windows_events._DefaultEventLoopPolicy + return unix_events._DefaultEventLoopPolicy + case "WindowsSelectorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsSelectorEventLoopPolicy + # Else fall through to the AttributeError below. + case "WindowsProactorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsProactorEventLoopPolicy + # Else fall through to the AttributeError below. raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index 21ca5c5f62ae83..e07dd52a2a5efc 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -64,7 +64,7 @@ def callback(): except BaseException as exc: future.set_exception(exc) - loop.call_soon_threadsafe(callback, context=self.context) + self.loop.call_soon_threadsafe(callback, context=self.context) try: return future.result() @@ -74,7 +74,8 @@ def callback(): return except BaseException: if keyboard_interrupted: - self.write("\nKeyboardInterrupt\n") + if not CAN_USE_PYREPL: + self.write("\nKeyboardInterrupt\n") else: self.showtraceback() return self.STATEMENT_FAILED diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 04fb961e9985e4..8cbb71f708537f 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -636,7 +636,7 @@ def _check_running(self): def _run_forever_setup(self): """Prepare the run loop to process events. - This method exists so that custom custom event loop subclasses (e.g., event loops + This method exists so that custom event loop subclasses (e.g., event loops that integrate a GUI event loop with Python's event loop) have access to all the loop setup logic. """ @@ -656,7 +656,7 @@ def _run_forever_setup(self): def _run_forever_cleanup(self): """Clean up after an event loop finishes the looping over events. - This method exists so that custom custom event loop subclasses (e.g., event loops + This method exists so that custom event loop subclasses (e.g., event loops that integrate a GUI event loop with Python's event loop) have access to all the loop cleanup logic. """ @@ -1016,38 +1016,43 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: {str(exc).lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None @@ -1161,7 +1166,7 @@ async def create_connection( raise ExceptionGroup("create_connection failed", exceptions) if len(exceptions) == 1: raise exceptions[0] - else: + elif exceptions: # If they all have the same str(), raise one. model = str(exceptions[0]) if all(str(exc) == model for exc in exceptions): @@ -1170,6 +1175,9 @@ async def create_connection( # the various error messages. raise OSError('Multiple exceptions: {}'.format( ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') finally: exceptions = None diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 9c2ba679ce2bf1..321a4e5d5d18fb 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -26,6 +26,7 @@ def __init__(self, loop, protocol, args, shell, self._pending_calls = collections.deque() self._pipes = {} self._finished = False + self._pipes_connected = False if stdin == subprocess.PIPE: self._pipes[0] = None @@ -104,7 +105,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? @@ -208,6 +214,7 @@ async def _connect_pipes(self, waiter): else: if waiter is not None and not waiter.cancelled(): waiter.set_result(None) + self._pipes_connected = True def _call(self, cb, *data): if self._pending_calls is not None: @@ -251,6 +258,15 @@ def _try_finish(self): assert not self._finished if self._returncode is None: return + if not self._pipes_connected: + # self._pipes_connected can be False if not all pipes were connected + # because either the process failed to start or the self._connect_pipes task + # got cancelled. In this broken state we consider all pipes disconnected and + # to avoid hanging forever in self._wait as otherwise _exit_waiters + # would never be woken up, we wake them up here. + for waiter in self._exit_waiters: + if not waiter.cancelled(): + waiter.set_result(self._returncode) if all(p is not None and p.disconnected for p in self._pipes.values()): self._finished = True diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 2913f901dca65f..a7fb55982abe9c 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -5,14 +5,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io __all__ = ( - "_AbstractEventLoopPolicy", "AbstractEventLoop", "AbstractServer", "Handle", "TimerHandle", - "_get_event_loop_policy", "get_event_loop_policy", - "_set_event_loop_policy", "set_event_loop_policy", "get_event_loop", "set_event_loop", @@ -791,7 +788,10 @@ def _init_event_loop_policy(): global _event_loop_policy with _lock: if _event_loop_policy is None: # pragma: no branch - from . import _DefaultEventLoopPolicy + if sys.platform == 'win32': + from .windows_events import _DefaultEventLoopPolicy + else: + from .unix_events import _DefaultEventLoopPolicy _event_loop_policy = _DefaultEventLoopPolicy() diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index d1df6707302277..e8a99556b1885e 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -392,7 +392,7 @@ def _set_state(future, other): def _call_check_cancel(destination): if destination.cancelled(): - if source_loop is None or source_loop is dest_loop: + if source_loop is None or source_loop is events._get_running_loop(): source.cancel() else: source_loop.call_soon_threadsafe(source.cancel) @@ -401,7 +401,7 @@ def _call_set_state(source): if (destination.cancelled() and dest_loop is not None and dest_loop.is_closed()): return - if dest_loop is None or dest_loop is source_loop: + if dest_loop is None or dest_loop is events._get_running_loop(): _set_state(destination, source) else: if dest_loop.is_closed(): diff --git a/Lib/asyncio/graph.py b/Lib/asyncio/graph.py index d8df7c9919abbf..b5bfeb1630a159 100644 --- a/Lib/asyncio/graph.py +++ b/Lib/asyncio/graph.py @@ -1,6 +1,7 @@ """Introspection utils for tasks call graphs.""" import dataclasses +import io import sys import types @@ -16,9 +17,6 @@ 'FutureCallGraph', ) -if False: # for type checkers - from typing import TextIO - # Sadly, we can't re-use the traceback module's datastructures as those # are tailored for error reporting, whereas we need to represent an # async call graph. @@ -270,7 +268,7 @@ def print_call_graph( future: futures.Future | None = None, /, *, - file: TextIO | None = None, + file: io.Writer[str] | None = None, depth: int = 1, limit: int | None = None, ) -> None: diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index 7eb55bd63ddb73..f404273c3ae5c1 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -460,6 +460,8 @@ def _pipe_closed(self, fut): class _ProactorDatagramTransport(_ProactorBasePipeTransport, transports.DatagramTransport): max_size = 256 * 1024 + _header_size = 8 + def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): self._address = address @@ -499,7 +501,7 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + 8 # include header bytes + self._buffer_size += len(data) + self._header_size if self._write_fut is None: # No current write operations are active, kick one off @@ -526,7 +528,7 @@ def _loop_writing(self, fut=None): return data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size if self._address is not None: self._write_fut = self._loop._proactor.send(self._sock, data) diff --git a/Lib/asyncio/queues.py b/Lib/asyncio/queues.py index 2f3865114a84f9..084fccaaff2ff7 100644 --- a/Lib/asyncio/queues.py +++ b/Lib/asyncio/queues.py @@ -227,9 +227,6 @@ def task_done(self): been processed (meaning that a task_done() call was received for every item that had been put() into the queue). - shutdown(immediate=True) calls task_done() for each remaining item in - the queue. - Raises ValueError if called more times than there were items placed in the queue. """ @@ -256,9 +253,11 @@ def shutdown(self, immediate=False): By default, gets will only raise once the queue is empty. Set 'immediate' to True to make gets raise immediately instead. - All blocked callers of put() and get() will be unblocked. If - 'immediate', a task is marked as done for each item remaining in - the queue, which may unblock callers of join(). + All blocked callers of put() and get() will be unblocked. + + If 'immediate', the queue is drained and unfinished tasks + is reduced by the number of drained tasks. If unfinished tasks + is reduced to zero, callers of Queue.join are unblocked. """ self._is_shutdown = True if immediate: diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 22147451fa7ebd..ff7e16df3c6273 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -173,7 +173,7 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: @@ -1050,8 +1050,8 @@ def _read_ready__on_eof(self): def write(self, data): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError(f'data argument must be a bytes-like object, ' - f'not {type(data).__name__!r}') + raise TypeError(f'data argument must be a bytes, bytearray, or memoryview ' + f'object, not {type(data).__name__!r}') if self._eof: raise RuntimeError('Cannot call write() after write_eof()') if self._empty_waiter is not None: @@ -1174,6 +1174,13 @@ def writelines(self, list_of_data): raise RuntimeError('unable to writelines; sendfile is in progress') if not list_of_data: return + + if self._conn_lost: + if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: + logger.warning('socket.send() raised exception.') + self._conn_lost += 1 + return + self._buffer.extend([memoryview(data) for data in list_of_data]) self._write_ready() # If the entire buffer couldn't be written, register a write handler @@ -1212,6 +1219,7 @@ def close(self): class _SelectorDatagramTransport(_SelectorTransport, transports.DatagramTransport): _buffer_factory = collections.deque + _header_size = 8 def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): @@ -1285,13 +1293,13 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + 8 # include header bytes + self._buffer_size += len(data) + self._header_size self._maybe_pause_protocol() def _sendto_ready(self): while self._buffer: data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size try: if self._extra['peername']: self._sock.send(data) @@ -1299,7 +1307,7 @@ def _sendto_ready(self): self._sock.sendto(data, addr) except (BlockingIOError, InterruptedError): self._buffer.appendleft((data, addr)) # Try again later. - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size break except OSError as exc: self._protocol.error_received(exc) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 888615f8e5e1b3..fbd5c39a7c56ac 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -908,6 +908,25 @@ def _done_callback(fut, cur_task=cur_task): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -953,14 +972,11 @@ def shield(arg): else: cur_task = None - def _inner_done_callback(inner, cur_task=cur_task): - if cur_task is not None: - futures.future_discard_from_awaited_by(inner, cur_task) + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -972,10 +988,16 @@ def _inner_done_callback(inner, cur_task=cur_task): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index bf1cb5e64cbfbe..f39e11fdd513b4 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,71 +1,99 @@ """Tools to analyze tasks running in asyncio programs.""" -from dataclasses import dataclass -from collections import defaultdict +from collections import defaultdict, namedtuple from itertools import count from enum import Enum import sys -from _remote_debugging import get_all_awaited_by - +from _remote_debugging import RemoteUnwinder, FrameInfo class NodeType(Enum): COROUTINE = 1 TASK = 2 -@dataclass(frozen=True) class CycleFoundException(Exception): """Raised when there is a cycle when drawing the call tree.""" - cycles: list[list[int]] - id2name: dict[int, str] + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name -# ─── indexing helpers ─────────────────────────────────────────── -def _format_stack_entry(elem: tuple[str, str, int] | str) -> str: - if isinstance(elem, tuple): - fqname, path, line_no = elem - return f"{fqname} {path}:{line_no}" +# ─── indexing helpers ─────────────────────────────────────────── +def _format_stack_entry(elem: str|FrameInfo) -> str: + if not isinstance(elem, str): + if elem.lineno == 0 and elem.filename == "": + return f"{elem.funcname}" + else: + return f"{elem.funcname} {elem.filename}:{elem.lineno}" return elem def _index(result): - id2name, awaits = {}, [] - for _thr_id, tasks in result: - for tid, tname, awaited in tasks: - id2name[tid] = tname - for stack, parent_id in awaited: - stack = [_format_stack_entry(elem) for elem in stack] - awaits.append((parent_id, stack, tid)) - return id2name, awaits - - -def _build_tree(id2name, awaits): + id2name, awaits, task_stacks = {}, [], {} + for awaited_info in result: + for task_info in awaited_info.awaited_by: + task_id = task_info.task_id + task_name = task_info.task_name + id2name[task_id] = task_name + + # Store the internal coroutine stack for this task + if task_info.coroutine_stack: + for coro_info in task_info.coroutine_stack: + call_stack = coro_info.call_stack + internal_stack = [_format_stack_entry(frame) for frame in call_stack] + task_stacks[task_id] = internal_stack + + # Add the awaited_by relationships (external dependencies) + if task_info.awaited_by: + for coro_info in task_info.awaited_by: + call_stack = coro_info.call_stack + parent_task_id = coro_info.task_name + stack = [_format_stack_entry(frame) for frame in call_stack] + awaits.append((parent_task_id, stack, task_id)) + return id2name, awaits, task_stacks + + +def _build_tree(id2name, awaits, task_stacks): id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} children = defaultdict(list) - cor_names = defaultdict(dict) # (parent) -> {frame: node} - cor_id_seq = count(1) - - def _cor_node(parent_key, frame_name): - """Return an existing or new (NodeType.COROUTINE, …) node under *parent_key*.""" - bucket = cor_names[parent_key] - if frame_name in bucket: - return bucket[frame_name] - node_key = (NodeType.COROUTINE, f"c{next(cor_id_seq)}") - id2label[node_key] = frame_name - children[parent_key].append(node_key) - bucket[frame_name] = node_key + cor_nodes = defaultdict(dict) # Maps parent -> {frame_name: node_key} + next_cor_id = count(1) + + def get_or_create_cor_node(parent, frame): + """Get existing coroutine node or create new one under parent""" + if frame in cor_nodes[parent]: + return cor_nodes[parent][frame] + + node_key = (NodeType.COROUTINE, f"c{next(next_cor_id)}") + id2label[node_key] = frame + children[parent].append(node_key) + cor_nodes[parent][frame] = node_key return node_key - # lay down parent ➜ …frames… ➜ child paths + # Build task dependency tree with coroutine frames for parent_id, stack, child_id in awaits: cur = (NodeType.TASK, parent_id) - for frame in reversed(stack): # outer-most → inner-most - cur = _cor_node(cur, frame) + for frame in reversed(stack): + cur = get_or_create_cor_node(cur, frame) + child_key = (NodeType.TASK, child_id) if child_key not in children[cur]: children[cur].append(child_key) + # Add coroutine stacks for leaf tasks + awaiting_tasks = {parent_id for parent_id, _, _ in awaits} + for task_id in id2name: + if task_id not in awaiting_tasks and task_id in task_stacks: + cur = (NodeType.TASK, task_id) + for frame in reversed(task_stacks[task_id]): + cur = get_or_create_cor_node(cur, frame) + return id2label, children @@ -112,6 +140,11 @@ def dfs(v): # ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + def build_async_tree(result, task_emoji="(T)", cor_emoji=""): """ Build a list of strings for pretty-print an async call tree. @@ -119,12 +152,12 @@ def build_async_tree(result, task_emoji="(T)", cor_emoji=""): The call tree is produced by `get_all_async_stacks()`, prefixing tasks with `task_emoji` and coroutine frames with `cor_emoji`. """ - id2name, awaits = _index(result) + id2name, awaits, task_stacks = _index(result) g = _task_graph(awaits) cycles = _find_cycles(g) if cycles: raise CycleFoundException(cycles, id2name) - labels, children = _build_tree(id2name, awaits) + labels, children = _build_tree(id2name, awaits, task_stacks) def pretty(node): flag = task_emoji if node[0] == NodeType.TASK else cor_emoji @@ -144,35 +177,40 @@ def render(node, prefix="", last=True, buf=None): def build_task_table(result): - id2name, awaits = _index(result) + id2name, _, _ = _index(result) table = [] - for tid, tasks in result: - for task_id, task_name, awaited in tasks: - if not awaited: - table.append( - [ - tid, - hex(task_id), - task_name, - "", - "", - "0x0" - ] - ) - for stack, awaiter_id in awaited: - stack = [elem[0] if isinstance(elem, tuple) else elem for elem in stack] - coroutine_chain = " -> ".join(stack) - awaiter_name = id2name.get(awaiter_id, "Unknown") - table.append( - [ - tid, - hex(task_id), - task_name, - coroutine_chain, - awaiter_name, - hex(awaiter_id), - ] - ) + + for awaited_info in result: + thread_id = awaited_info.thread_id + for task_info in awaited_info.awaited_by: + # Get task info + task_id = task_info.task_id + task_name = task_info.task_name + + # Build coroutine stack string + frames = [frame for coro in task_info.coroutine_stack + for frame in coro.call_stack] + coro_stack = " -> ".join(_format_stack_entry(x).split(" ")[0] + for x in frames) + + # Handle tasks with no awaiters + if not task_info.awaited_by: + table.append([thread_id, hex(task_id), task_name, coro_stack, + "", "", "0x0"]) + continue + + # Handle tasks with awaiters + for coro_info in task_info.awaited_by: + parent_id = coro_info.task_name + awaiter_frames = [_format_stack_entry(x).split(" ")[0] + for x in coro_info.call_stack] + awaiter_chain = " -> ".join(awaiter_frames) + awaiter_name = id2name.get(parent_id, "Unknown") + parent_id_str = (hex(parent_id) if isinstance(parent_id, int) + else str(parent_id)) + + table.append([thread_id, hex(task_id), task_name, coro_stack, + awaiter_chain, awaiter_name, parent_id_str]) return table @@ -184,6 +222,20 @@ def _print_cycle_exception(exception: CycleFoundException): print(f"cycle: {inames}", file=sys.stderr) +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + def _get_awaited_by_tasks(pid: int) -> list: try: return get_all_awaited_by(pid) @@ -192,6 +244,8 @@ def _get_awaited_by_tasks(pid: int) -> list: e = e.__context__ print(f"Error retrieving tasks: {e}") sys.exit(1) + except PermissionError as e: + exit_with_permission_help_text() def display_awaited_by_tasks_table(pid: int) -> None: @@ -201,11 +255,11 @@ def display_awaited_by_tasks_table(pid: int) -> None: table = build_task_table(tasks) # Print the table in a simple tabular format print( - f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine chain':<50} {'awaiter name':<20} {'awaiter id':<15}" + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine stack':<50} {'awaiter chain':<50} {'awaiter name':<15} {'awaiter id':<15}" ) - print("-" * 135) + print("-" * 180) for row in table: - print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<20} {row[5]:<15}") + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<50} {row[5]:<15} {row[6]:<15}") def display_awaited_by_tasks_tree(pid: int) -> None: diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index f69c6a64c39ae6..1c1458127db5ac 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -28,7 +28,6 @@ __all__ = ( 'SelectorEventLoop', - '_DefaultEventLoopPolicy', 'EventLoop', ) diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..f95132a4274051 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -462,9 +462,12 @@ def b85decode(b): # Delay the initialization of tables to not waste memory # if the function is never called if _b85dec is None: - _b85dec = [None] * 256 + # we don't assign to _b85dec directly to avoid issues when + # multiple threads call this function simultaneously + b85dec_tmp = [None] * 256 for i, c in enumerate(_b85alphabet): - _b85dec[c] = i + b85dec_tmp[c] = i + _b85dec = b85dec_tmp b = _bytes_from_decode_data(b) padding = (-len(b)) % 5 @@ -601,7 +604,14 @@ def main(): with open(args[0], 'rb') as f: func(f, sys.stdout.buffer) else: - func(sys.stdin.buffer, sys.stdout.buffer) + if sys.stdin.isatty(): + # gh-138775: read terminal input data all at once to detect EOF + import io + data = sys.stdin.buffer.read() + buffer = io.BytesIO(data) + else: + buffer = sys.stdin.buffer + func(buffer, sys.stdout.buffer) if __name__ == '__main__': diff --git a/Lib/bdb.py b/Lib/bdb.py index 4290ef22302a42..79da4bab9c9034 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -199,6 +199,8 @@ def __init__(self, skip=None, backend='settrace'): self.frame_returning = None self.trace_opcodes = False self.enterframe = None + self.cmdframe = None + self.cmdlineno = None self.code_linenos = weakref.WeakKeyDictionary() self.backend = backend if backend == 'monitoring': @@ -306,7 +308,12 @@ def dispatch_line(self, frame): self.user_line(). Raise BdbQuit if self.quitting is set. Return self.trace_dispatch to continue tracing in this scope. """ - if self.stop_here(frame) or self.break_here(frame): + # GH-136057 + # For line events, we don't want to stop at the same line where + # the latest next/step command was issued. + if (self.stop_here(frame) or self.break_here(frame)) and not ( + self.cmdframe == frame and self.cmdlineno == frame.f_lineno + ): self.user_line(frame) self.restart_events() if self.quitting: raise BdbQuit @@ -535,7 +542,8 @@ def _set_trace_opcodes(self, trace_opcodes): if self.monitoring_tracer: self.monitoring_tracer.update_local_events() - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False): + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False, + cmdframe=None, cmdlineno=None): """Set the attributes for stopping. If stoplineno is greater than or equal to 0, then stop at line @@ -548,6 +556,10 @@ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False): # stoplineno >= 0 means: stop at line >= the stoplineno # stoplineno -1 means: don't stop at all self.stoplineno = stoplineno + # cmdframe/cmdlineno is the frame/line number when the user issued + # step/next commands. + self.cmdframe = cmdframe + self.cmdlineno = cmdlineno self._set_trace_opcodes(opcode) def _set_caller_tracefunc(self, current_frame): @@ -573,7 +585,9 @@ def set_until(self, frame, lineno=None): def set_step(self): """Stop after one line of code.""" - self._set_stopinfo(None, None) + # set_step() could be called from signal handler so enterframe might be None + self._set_stopinfo(None, None, cmdframe=self.enterframe, + cmdlineno=getattr(self.enterframe, 'f_lineno', None)) def set_stepinstr(self): """Stop before the next instruction.""" @@ -581,7 +595,7 @@ def set_stepinstr(self): def set_next(self, frame): """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) + self._set_stopinfo(frame, None, cmdframe=frame, cmdlineno=frame.f_lineno) def set_return(self, frame): """Stop when returning from the given frame.""" diff --git a/Lib/codecs.py b/Lib/codecs.py index fc38e922257644..e4a8010aba90a5 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -618,7 +618,7 @@ def readlines(self, sizehint=None, keepends=True): method and are included in the list entries. sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. + way of finding the true end-of-line. """ data = self.read() @@ -709,13 +709,13 @@ def read(self, size=-1): return self.reader.read(size) - def readline(self, size=None): + def readline(self, size=None, keepends=True): - return self.reader.readline(size) + return self.reader.readline(size, keepends) - def readlines(self, sizehint=None): + def readlines(self, sizehint=None, keepends=True): - return self.reader.readlines(sizehint) + return self.reader.readlines(sizehint, keepends) def __next__(self): diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index d2ddc1cd9ec2ea..803de0c6792d64 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1495,6 +1495,8 @@ def format_map(self, mapping): return self.data.format_map(mapping) def index(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.index(sub, start, end) def isalpha(self): @@ -1563,6 +1565,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize): return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rindex(sub, start, end) def rjust(self, width, *args): diff --git a/Lib/compression/bz2/__init__.py b/Lib/compression/bz2.py similarity index 100% rename from Lib/compression/bz2/__init__.py rename to Lib/compression/bz2.py diff --git a/Lib/compression/gzip/__init__.py b/Lib/compression/gzip.py similarity index 100% rename from Lib/compression/gzip/__init__.py rename to Lib/compression/gzip.py diff --git a/Lib/compression/lzma/__init__.py b/Lib/compression/lzma.py similarity index 100% rename from Lib/compression/lzma/__init__.py rename to Lib/compression/lzma.py diff --git a/Lib/compression/zlib/__init__.py b/Lib/compression/zlib.py similarity index 100% rename from Lib/compression/zlib/__init__.py rename to Lib/compression/zlib.py diff --git a/Lib/compression/zstd/__init__.py b/Lib/compression/zstd/__init__.py index 4f734eb07b00e3..84b25914b0aa93 100644 --- a/Lib/compression/zstd/__init__.py +++ b/Lib/compression/zstd/__init__.py @@ -2,41 +2,48 @@ __all__ = ( # compression.zstd - "COMPRESSION_LEVEL_DEFAULT", - "compress", - "CompressionParameter", - "decompress", - "DecompressionParameter", - "finalize_dict", - "get_frame_info", - "Strategy", - "train_dict", + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', # compression.zstd._zstdfile - "open", - "ZstdFile", + 'open', + 'ZstdFile', # _zstd - "get_frame_size", - "zstd_version", - "zstd_version_info", - "ZstdCompressor", - "ZstdDecompressor", - "ZstdDict", - "ZstdError", + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', ) import _zstd import enum -from _zstd import * +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) from compression.zstd._zstdfile import ZstdFile, open, _nbytes -COMPRESSION_LEVEL_DEFAULT = _zstd._compressionLevel_values[0] +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT """The default compression level for Zstandard, currently '3'.""" class FrameInfo: """Information about a Zstandard frame.""" + __slots__ = 'decompressed_size', 'dictionary_id' def __init__(self, decompressed_size, dictionary_id): @@ -65,7 +72,7 @@ def get_frame_info(frame_buffer): the frame may or may not need a dictionary to be decoded, and the ID of such a dictionary is not specified. """ - return FrameInfo(*_zstd._get_frame_info(frame_buffer)) + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) def train_dict(samples, dict_size): @@ -85,7 +92,7 @@ def train_dict(samples, dict_size): chunk_sizes = tuple(_nbytes(sample) for sample in samples) if not chunks: raise ValueError("samples contained no data; can't train dictionary.") - dict_content = _zstd._train_dict(chunks, chunk_sizes, dict_size) + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) return ZstdDict(dict_content) @@ -119,13 +126,13 @@ def finalize_dict(zstd_dict, /, samples, dict_size, level): chunks = b''.join(samples) chunk_sizes = tuple(_nbytes(sample) for sample in samples) if not chunks: - raise ValueError("The samples are empty content, can't finalize the" + raise ValueError("The samples are empty content, can't finalize the " "dictionary.") - dict_content = _zstd._finalize_dict(zstd_dict.dict_content, - chunks, chunk_sizes, - dict_size, level) + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) return ZstdDict(dict_content) + def compress(data, level=None, options=None, zstd_dict=None): """Return Zstandard compressed *data* as bytes. @@ -141,6 +148,7 @@ def compress(data, level=None, options=None, zstd_dict=None): comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + def decompress(data, zstd_dict=None, options=None): """Decompress one or more frames of Zstandard compressed *data*. @@ -156,59 +164,59 @@ def decompress(data, zstd_dict=None, options=None): decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) results.append(decomp.decompress(data)) if not decomp.eof: - raise ZstdError("Compressed data ended before the " - "end-of-stream marker was reached") + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') data = decomp.unused_data if not data: break - return b"".join(results) + return b''.join(results) class CompressionParameter(enum.IntEnum): """Compression parameters.""" - compression_level = _zstd._ZSTD_c_compressionLevel - window_log = _zstd._ZSTD_c_windowLog - hash_log = _zstd._ZSTD_c_hashLog - chain_log = _zstd._ZSTD_c_chainLog - search_log = _zstd._ZSTD_c_searchLog - min_match = _zstd._ZSTD_c_minMatch - target_length = _zstd._ZSTD_c_targetLength - strategy = _zstd._ZSTD_c_strategy - - enable_long_distance_matching = _zstd._ZSTD_c_enableLongDistanceMatching - ldm_hash_log = _zstd._ZSTD_c_ldmHashLog - ldm_min_match = _zstd._ZSTD_c_ldmMinMatch - ldm_bucket_size_log = _zstd._ZSTD_c_ldmBucketSizeLog - ldm_hash_rate_log = _zstd._ZSTD_c_ldmHashRateLog - - content_size_flag = _zstd._ZSTD_c_contentSizeFlag - checksum_flag = _zstd._ZSTD_c_checksumFlag - dict_id_flag = _zstd._ZSTD_c_dictIDFlag - - nb_workers = _zstd._ZSTD_c_nbWorkers - job_size = _zstd._ZSTD_c_jobSize - overlap_log = _zstd._ZSTD_c_overlapLog + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog def bounds(self): """Return the (lower, upper) int bounds of a compression parameter. Both the lower and upper bounds are inclusive. """ - return _zstd._get_param_bounds(self.value, is_compress=True) + return _zstd.get_param_bounds(self.value, is_compress=True) class DecompressionParameter(enum.IntEnum): """Decompression parameters.""" - window_log_max = _zstd._ZSTD_d_windowLogMax + window_log_max = _zstd.ZSTD_d_windowLogMax def bounds(self): """Return the (lower, upper) int bounds of a decompression parameter. Both the lower and upper bounds are inclusive. """ - return _zstd._get_param_bounds(self.value, is_compress=False) + return _zstd.get_param_bounds(self.value, is_compress=False) class Strategy(enum.IntEnum): @@ -219,16 +227,16 @@ class Strategy(enum.IntEnum): the numeric value might change. """ - fast = _zstd._ZSTD_fast - dfast = _zstd._ZSTD_dfast - greedy = _zstd._ZSTD_greedy - lazy = _zstd._ZSTD_lazy - lazy2 = _zstd._ZSTD_lazy2 - btlazy2 = _zstd._ZSTD_btlazy2 - btopt = _zstd._ZSTD_btopt - btultra = _zstd._ZSTD_btultra - btultra2 = _zstd._ZSTD_btultra2 + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 # Check validity of the CompressionParameter & DecompressionParameter types -_zstd._set_parameter_types(CompressionParameter, DecompressionParameter) +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py index fbc9e02a733626..d709f5efc658fa 100644 --- a/Lib/compression/zstd/_zstdfile.py +++ b/Lib/compression/zstd/_zstdfile.py @@ -1,12 +1,9 @@ import io from os import PathLike -from _zstd import (ZstdCompressor, ZstdDecompressor, _ZSTD_DStreamSizes, - ZstdError) +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize from compression._common import _streams -__all__ = ("ZstdFile", "open") - -_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] +__all__ = ('ZstdFile', 'open') _MODE_CLOSED = 0 _MODE_READ = 1 @@ -33,15 +30,15 @@ class ZstdFile(_streams.BaseStream): FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME - def __init__(self, file, /, mode="r", *, + def __init__(self, file, /, mode='r', *, level=None, options=None, zstd_dict=None): """Open a Zstandard compressed file in binary mode. *file* can be either an file-like object, or a file name to open. - *mode* can be "r" for reading (default), "w" for (over)writing, "x" for - creating exclusively, or "a" for appending. These can equivalently be - given as "rb", "wb", "xb" and "ab" respectively. + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for + creating exclusively, or 'a' for appending. These can equivalently be + given as 'rb', 'wb', 'xb' and 'ab' respectively. *level* is an optional int specifying the compression level to use, or COMPRESSION_LEVEL_DEFAULT if not given. @@ -59,39 +56,38 @@ def __init__(self, file, /, mode="r", *, self._buffer = None if not isinstance(mode, str): - raise ValueError("mode must be a str") + raise ValueError('mode must be a str') if options is not None and not isinstance(options, dict): - raise TypeError("options must be a dict or None") - mode = mode.removesuffix("b") # handle rb, wb, xb, ab - if mode == "r": + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': if level is not None: - raise TypeError("level is illegal in read mode") + raise TypeError('level is illegal in read mode') self._mode = _MODE_READ - elif mode in {"w", "a", "x"}: + elif mode in {'w', 'a', 'x'}: if level is not None and not isinstance(level, int): - raise TypeError("level must be int or None") + raise TypeError('level must be int or None') self._mode = _MODE_WRITE self._compressor = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) self._pos = 0 else: - raise ValueError(f"Invalid mode: {mode!r}") + raise ValueError(f'Invalid mode: {mode!r}') if isinstance(file, (str, bytes, PathLike)): self._fp = io.open(file, f'{mode}b') self._close_fp = True - elif ((mode == 'r' and hasattr(file, "read")) - or (mode != 'r' and hasattr(file, "write"))): + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): self._fp = file else: - raise TypeError("file must be a file-like object " - "or a str, bytes, or PathLike object") + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') if self._mode == _MODE_READ: raw = _streams.DecompressReader( self._fp, ZstdDecompressor, - trailing_error=ZstdError, zstd_dict=zstd_dict, options=options, ) @@ -154,22 +150,22 @@ def flush(self, mode=FLUSH_BLOCK): return self._check_not_closed() if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: - raise ValueError("Invalid mode argument, expected either " - "ZstdFile.FLUSH_FRAME or " - "ZstdFile.FLUSH_BLOCK") + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') if self._compressor.last_mode == mode: return # Flush zstd block/frame, and write. data = self._compressor.flush(mode) self._fp.write(data) - if hasattr(self._fp, "flush"): + if hasattr(self._fp, 'flush'): self._fp.flush() def read(self, size=-1): """Read up to size uncompressed bytes from the file. If size is negative or omitted, read until EOF is reached. - Returns b"" if the file is already at EOF. + Returns b'' if the file is already at EOF. """ if size is None: size = -1 @@ -181,14 +177,14 @@ def read1(self, size=-1): making multiple reads from the underlying stream. Reads up to a buffer's worth of data if size is negative. - Returns b"" if the file is at EOF. + Returns b'' if the file is at EOF. """ self._check_can_read() if size < 0: # Note this should *not* be io.DEFAULT_BUFFER_SIZE. # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing # a full block is read. - size = _ZSTD_DStreamOutSize + size = ZSTD_DStreamOutSize return self._buffer.read1(size) def readinto(self, b): @@ -296,7 +292,7 @@ def writable(self): return self._mode == _MODE_WRITE -def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, encoding=None, errors=None, newline=None): """Open a Zstandard compressed file in binary or text mode. @@ -304,8 +300,8 @@ def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, in which case the named file is opened, or it can be an existing file object to read from or write to. - The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", - "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a', + 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. The level, options, and zstd_dict parameters specify the settings the same as ZstdFile. @@ -326,19 +322,19 @@ def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, behavior, and line ending(s). """ - text_mode = "t" in mode - mode = mode.replace("t", "") + text_mode = 't' in mode + mode = mode.replace('t', '') if text_mode: - if "b" in mode: - raise ValueError(f"Invalid mode: {mode!r}") + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') else: if encoding is not None: - raise ValueError("Argument 'encoding' not supported in binary mode") + raise ValueError('Argument "encoding" not supported in binary mode') if errors is not None: - raise ValueError("Argument 'errors' not supported in binary mode") + raise ValueError('Argument "errors" not supported in binary mode') if newline is not None: - raise ValueError("Argument 'newline' not supported in binary mode") + raise ValueError('Argument "newline" not supported in binary mode') binary_file = ZstdFile(file, mode, level=level, options=options, zstd_dict=zstd_dict) diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 7ada7431c1ab8c..d6ac4b3e0b675f 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -17,7 +17,7 @@ wait, as_completed) -__all__ = ( +__all__ = [ 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', @@ -29,36 +29,37 @@ 'Executor', 'wait', 'as_completed', - 'InterpreterPoolExecutor', 'ProcessPoolExecutor', 'ThreadPoolExecutor', -) +] + + +try: + import _interpreters +except ImportError: + _interpreters = None + +if _interpreters: + __all__.append('InterpreterPoolExecutor') def __dir__(): - return __all__ + ('__author__', '__doc__') + return __all__ + ['__author__', '__doc__'] def __getattr__(name): global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': - from .process import ProcessPoolExecutor as pe - ProcessPoolExecutor = pe - return pe + from .process import ProcessPoolExecutor + return ProcessPoolExecutor if name == 'ThreadPoolExecutor': - from .thread import ThreadPoolExecutor as te - ThreadPoolExecutor = te - return te + from .thread import ThreadPoolExecutor + return ThreadPoolExecutor - if name == 'InterpreterPoolExecutor': - try: - from .interpreter import InterpreterPoolExecutor as ie - except ModuleNotFoundError: - ie = InterpreterPoolExecutor = None - else: - InterpreterPoolExecutor = ie - return ie + if _interpreters and name == 'InterpreterPoolExecutor': + from .interpreter import InterpreterPoolExecutor + return InterpreterPoolExecutor raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index d17688dc9d7346..85c1da2c722894 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -1,69 +1,38 @@ """Implements InterpreterPoolExecutor.""" -import contextlib -import pickle -import textwrap +from concurrent import interpreters +import sys from . import thread as _thread -import _interpreters -import _interpqueues +import traceback -class ExecutionFailed(_interpreters.InterpreterError): - """An unhandled exception happened during execution.""" - - def __init__(self, excinfo): - msg = excinfo.formatted - if not msg: - if excinfo.type and excinfo.msg: - msg = f'{excinfo.type.__name__}: {excinfo.msg}' - else: - msg = excinfo.type.__name__ or excinfo.msg - super().__init__(msg) - self.excinfo = excinfo - - def __str__(self): +def do_call(results, func, args, kwargs): + try: + return func(*args, **kwargs) + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. try: - formatted = self.excinfo.errdisplay - except Exception: - return super().__str__() - else: - return textwrap.dedent(f""" -{super().__str__()} - -Uncaught in the interpreter: - -{formatted} - """.strip()) - - -UNBOUND = 2 # error; this should not happen. + results.put(exc) + except interpreters.NotShareableError: + # The exception is not shareable. + print('exception is not shareable:', file=sys.stderr) + traceback.print_exception(exc) + results.put(None) + raise # re-raise class WorkerContext(_thread.WorkerContext): @classmethod - def prepare(cls, initializer, initargs, shared): + def prepare(cls, initializer, initargs): def resolve_task(fn, args, kwargs): if isinstance(fn, str): # XXX Circle back to this later. raise TypeError('scripts not supported') - if args or kwargs: - raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') - data = textwrap.dedent(fn) - kind = 'script' - # Make sure the script compiles. - # Ideally we wouldn't throw away the resulting code - # object. However, there isn't much to be done until - # code objects are shareable and/or we do a better job - # of supporting code objects in _interpreters.exec(). - compile(data, '', 'exec') else: - # Functions defined in the __main__ module can't be pickled, - # so they can't be used here. In the future, we could possibly - # borrow from multiprocessing to work around this. - data = pickle.dumps((fn, args, kwargs)) - kind = 'function' - return (data, kind) + task = (fn, args, kwargs) + return task if initializer is not None: try: @@ -75,73 +44,24 @@ def resolve_task(fn, args, kwargs): else: initdata = None def create_context(): - return cls(initdata, shared) + return cls(initdata) return create_context, resolve_task - @classmethod - @contextlib.contextmanager - def _capture_exc(cls, resultsid): - try: - yield - except BaseException as exc: - # Send the captured exception out on the results queue, - # but still leave it unhandled for the interpreter to handle. - err = pickle.dumps(exc) - _interpqueues.put(resultsid, (None, err), 1, UNBOUND) - raise # re-raise - - @classmethod - def _send_script_result(cls, resultsid): - _interpqueues.put(resultsid, (None, None), 0, UNBOUND) - - @classmethod - def _call(cls, func, args, kwargs, resultsid): - with cls._capture_exc(resultsid): - res = func(*args or (), **kwargs or {}) - # Send the result back. - try: - _interpqueues.put(resultsid, (res, None), 0, UNBOUND) - except _interpreters.NotShareableError: - res = pickle.dumps(res) - _interpqueues.put(resultsid, (res, None), 1, UNBOUND) - - @classmethod - def _call_pickled(cls, pickled, resultsid): - with cls._capture_exc(resultsid): - fn, args, kwargs = pickle.loads(pickled) - cls._call(fn, args, kwargs, resultsid) - - def __init__(self, initdata, shared=None): + def __init__(self, initdata): self.initdata = initdata - self.shared = dict(shared) if shared else None - self.interpid = None - self.resultsid = None + self.interp = None + self.results = None def __del__(self): - if self.interpid is not None: + if self.interp is not None: self.finalize() - def _exec(self, script): - assert self.interpid is not None - excinfo = _interpreters.exec(self.interpid, script, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) - def initialize(self): - assert self.interpid is None, self.interpid - self.interpid = _interpreters.create(reqrefs=True) + assert self.interp is None, self.interp + self.interp = interpreters.create() try: - _interpreters.incref(self.interpid) - maxsize = 0 - fmt = 0 - self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) - - self._exec(f'from {__name__} import WorkerContext') - - if self.shared: - _interpreters.set___main___attrs( - self.interpid, self.shared, restrict=True) + self.results = interpreters.create_queue(maxsize) if self.initdata: self.run(self.initdata) @@ -150,64 +70,25 @@ def initialize(self): raise # re-raise def finalize(self): - interpid = self.interpid - resultsid = self.resultsid - self.resultsid = None - self.interpid = None - if resultsid is not None: - try: - _interpqueues.destroy(resultsid) - except _interpqueues.QueueNotFoundError: - pass - if interpid is not None: - try: - _interpreters.decref(interpid) - except _interpreters.InterpreterNotFoundError: - pass + interp = self.interp + results = self.results + self.results = None + self.interp = None + if results is not None: + del results + if interp is not None: + interp.close() def run(self, task): - data, kind = task - if kind == 'script': - raise NotImplementedError('script kind disabled') - script = f""" -with WorkerContext._capture_exc({self.resultsid}): -{textwrap.indent(data, ' ')} -WorkerContext._send_script_result({self.resultsid})""" - elif kind == 'function': - script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' - else: - raise NotImplementedError(kind) - try: - self._exec(script) - except ExecutionFailed as exc: - exc_wrapper = exc - else: - exc_wrapper = None - - # Return the result, or raise the exception. - while True: - try: - obj = _interpqueues.get(self.resultsid) - except _interpqueues.QueueNotFoundError: + return self.interp.call(do_call, self.results, *task) + except interpreters.ExecutionFailed as wrapper: + # Wait for the exception data to show up. + exc = self.results.get() + if exc is None: + # The exception must have been not shareable. raise # re-raise - except _interpqueues.QueueError: - continue - except ModuleNotFoundError: - # interpreters.queues doesn't exist, which means - # QueueEmpty doesn't. Act as though it does. - continue - else: - break - (res, excdata), pickled, unboundop = obj - assert unboundop is None, unboundop - if excdata is not None: - assert res is None, res - assert pickled - assert exc_wrapper is not None - exc = pickle.loads(excdata) - raise exc from exc_wrapper - return pickle.loads(res) if pickled else res + raise exc from wrapper class BrokenInterpreterPool(_thread.BrokenThreadPool): @@ -221,11 +102,11 @@ class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): BROKEN = BrokenInterpreterPool @classmethod - def prepare_context(cls, initializer, initargs, shared): - return WorkerContext.prepare(initializer, initargs, shared) + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=(), shared=None): + initializer=None, initargs=()): """Initializes a new InterpreterPoolExecutor instance. Args: @@ -235,8 +116,8 @@ def __init__(self, max_workers=None, thread_name_prefix='', initializer: A callable or script used to initialize each worker interpreter. initargs: A tuple of arguments to pass to the initializer. - shared: A mapping of shareabled objects to be inserted into - each worker interpreter. """ + thread_name_prefix = (thread_name_prefix or + (f"InterpreterPoolExecutor-{self._counter()}")) super().__init__(max_workers, thread_name_prefix, - initializer, initargs, shared=shared) + initializer, initargs) diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 76b7b2abe836d8..a14650bf5fa47c 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -755,6 +755,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return diff --git a/Lib/test/support/interpreters/__init__.py b/Lib/concurrent/interpreters/__init__.py similarity index 84% rename from Lib/test/support/interpreters/__init__.py rename to Lib/concurrent/interpreters/__init__.py index e067f259364d2a..ea4147ee9a25da 100644 --- a/Lib/test/support/interpreters/__init__.py +++ b/Lib/concurrent/interpreters/__init__.py @@ -9,6 +9,10 @@ InterpreterError, InterpreterNotFoundError, NotShareableError, is_shareable, ) +from ._queues import ( + create as create_queue, + Queue, QueueEmpty, QueueFull, +) __all__ = [ @@ -20,21 +24,6 @@ ] -_queuemod = None - -def __getattr__(name): - if name in ('Queue', 'QueueEmpty', 'QueueFull', 'create_queue'): - global create_queue, Queue, QueueEmpty, QueueFull - ns = globals() - from .queues import ( - create as create_queue, - Queue, QueueEmpty, QueueFull, - ) - return ns[name] - else: - raise AttributeError(name) - - _EXEC_FAILURE_STR = """ {superstr} @@ -157,19 +146,20 @@ def __del__(self): self._decref() # for pickling: - def __getnewargs__(self): - return (self._id,) - - # for pickling: - def __getstate__(self): - return None - - def _decref(self): + def __reduce__(self): + return (type(self), (self._id,)) + + # gh-135729: Globals might be destroyed by the time this is called, so we + # need to keep references ourself + def _decref(self, *, + InterpreterNotFoundError=InterpreterNotFoundError, + _interp_decref=_interpreters.decref, + ): if not self._ownsref: return self._ownsref = False try: - _interpreters.decref(self._id) + _interp_decref(self._id) except InterpreterNotFoundError: pass @@ -226,33 +216,32 @@ def exec(self, code, /): if excinfo is not None: raise ExecutionFailed(excinfo) - def call(self, callable, /): - """Call the object in the interpreter with given args/kwargs. + def _call(self, callable, args, kwargs): + res, excinfo = _interpreters.call(self._id, callable, args, kwargs, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + return res - Only functions that take no arguments and have no closure - are supported. + def call(self, callable, /, *args, **kwargs): + """Call the object in the interpreter with given args/kwargs. - The return value is discarded. + Nearly all callables, args, kwargs, and return values are + supported. All "shareable" objects are supported, as are + "stateless" functions (meaning non-closures that do not use + any globals). This method will fall back to pickle. If the callable raises an exception then the error display - (including full traceback) is send back between the interpreters + (including full traceback) is sent back between the interpreters and an ExecutionFailed exception is raised, much like what happens with Interpreter.exec(). """ - # XXX Support args and kwargs. - # XXX Support arbitrary callables. - # XXX Support returning the return value (e.g. via pickle). - excinfo = _interpreters.call(self._id, callable, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) + return self._call(callable, args, kwargs) - def call_in_thread(self, callable, /): + def call_in_thread(self, callable, /, *args, **kwargs): """Return a new thread that calls the object in the interpreter. The return value and any raised exception are discarded. """ - def task(): - self.call(callable) - t = threading.Thread(target=task) + t = threading.Thread(target=self._call, args=(callable, args, kwargs)) t.start() return t diff --git a/Lib/test/support/interpreters/_crossinterp.py b/Lib/concurrent/interpreters/_crossinterp.py similarity index 86% rename from Lib/test/support/interpreters/_crossinterp.py rename to Lib/concurrent/interpreters/_crossinterp.py index 544e197ba4c028..a5f46b20fbb4c5 100644 --- a/Lib/test/support/interpreters/_crossinterp.py +++ b/Lib/concurrent/interpreters/_crossinterp.py @@ -40,16 +40,21 @@ class UnboundItem: @classonly def singleton(cls, kind, module, name='UNBOUND'): - doc = cls.__doc__.replace('cross-interpreter container', kind) - doc = doc.replace('cross-interpreter', kind) + doc = cls.__doc__ + if doc: + doc = doc.replace( + 'cross-interpreter container', kind, + ).replace( + 'cross-interpreter', kind, + ) subclass = type( f'Unbound{kind.capitalize()}Item', (cls,), - dict( - _MODULE=module, - _NAME=name, - __doc__=doc, - ), + { + "_MODULE": module, + "_NAME": name, + "__doc__": doc, + }, ) return object.__new__(subclass) @@ -61,7 +66,7 @@ def __new__(cls): def __repr__(self): return f'{self._MODULE}.{self._NAME}' -# return f'interpreters.queues.UNBOUND' +# return f'interpreters._queues.UNBOUND' UNBOUND = object.__new__(UnboundItem) diff --git a/Lib/test/support/interpreters/queues.py b/Lib/concurrent/interpreters/_queues.py similarity index 59% rename from Lib/test/support/interpreters/queues.py rename to Lib/concurrent/interpreters/_queues.py index deb8e8613af731..81ea1098d7f9f3 100644 --- a/Lib/test/support/interpreters/queues.py +++ b/Lib/concurrent/interpreters/_queues.py @@ -63,29 +63,34 @@ def _resolve_unbound(flag): return resolved -def create(maxsize=0, *, syncobj=False, unbounditems=UNBOUND): +def create(maxsize=0, *, unbounditems=UNBOUND): """Return a new cross-interpreter queue. The queue may be used to pass data safely between interpreters. - "syncobj" sets the default for Queue.put() - and Queue.put_nowait(). - - "unbounditems" likewise sets the default. See Queue.put() for + "unbounditems" sets the default for Queue.put(); see that method for supported values. The default value is UNBOUND, which replaces the unbound item. """ - fmt = _SHARED_ONLY if syncobj else _PICKLED unbound = _serialize_unbound(unbounditems) unboundop, = unbound - qid = _queues.create(maxsize, fmt, unboundop) - return Queue(qid, _fmt=fmt, _unbound=unbound) + qid = _queues.create(maxsize, unboundop, -1) + self = Queue(qid) + self._set_unbound(unboundop, unbounditems) + return self def list_all(): """Return a list of all open queues.""" - return [Queue(qid, _fmt=fmt, _unbound=(unboundop,)) - for qid, fmt, unboundop in _queues.list_all()] + queues = [] + for qid, unboundop, _ in _queues.list_all(): + self = Queue(qid) + if not hasattr(self, '_unbound'): + self._set_unbound(unboundop) + else: + assert self._unbound[0] == unboundop + queues.append(self) + return queues _known_queues = weakref.WeakValueDictionary() @@ -93,28 +98,17 @@ def list_all(): class Queue: """A cross-interpreter queue.""" - def __new__(cls, id, /, *, _fmt=None, _unbound=None): + def __new__(cls, id, /): # There is only one instance for any given ID. if isinstance(id, int): id = int(id) else: raise TypeError(f'id must be an int, got {id!r}') - if _fmt is None: - if _unbound is None: - _fmt, op = _queues.get_queue_defaults(id) - _unbound = (op,) - else: - _fmt, _ = _queues.get_queue_defaults(id) - elif _unbound is None: - _, op = _queues.get_queue_defaults(id) - _unbound = (op,) try: self = _known_queues[id] except KeyError: self = super().__new__(cls) self._id = id - self._fmt = _fmt - self._unbound = _unbound _known_queues[id] = self _queues.bind(id) return self @@ -136,17 +130,30 @@ def __hash__(self): return hash(self._id) # for pickling: - def __getnewargs__(self): - return (self._id,) + def __reduce__(self): + return (type(self), (self._id,)) - # for pickling: - def __getstate__(self): - return None + def _set_unbound(self, op, items=None): + assert not hasattr(self, '_unbound') + if items is None: + items = _resolve_unbound(op) + unbound = (op, items) + self._unbound = unbound + return unbound @property def id(self): return self._id + @property + def unbounditems(self): + try: + _, items = self._unbound + except AttributeError: + op, _ = _queues.get_queue_defaults(self._id) + _, items = self._set_unbound(op) + return items + @property def maxsize(self): try: @@ -164,78 +171,59 @@ def full(self): def qsize(self): return _queues.get_count(self._id) - def put(self, obj, timeout=None, *, - syncobj=None, - unbound=None, + def put(self, obj, block=True, timeout=None, *, + unbounditems=None, _delay=10 / 1000, # 10 milliseconds ): """Add the object to the queue. - This blocks while the queue is full. + If "block" is true, this blocks while the queue is full. - If "syncobj" is None (the default) then it uses the - queue's default, set with create_queue(). + For most objects, the object received through Queue.get() will + be a new one, equivalent to the original and not sharing any + actual underlying data. The notable exceptions include + cross-interpreter types (like Queue) and memoryview, where the + underlying data is actually shared. Furthermore, some types + can be sent through a queue more efficiently than others. This + group includes various immutable types like int, str, bytes, and + tuple (if the items are likewise efficiently shareable). See interpreters.is_shareable(). - If "syncobj" is false then all objects are supported, - at the expense of worse performance. - - If "syncobj" is true then the object must be "shareable". - Examples of "shareable" objects include the builtin singletons, - str, and memoryview. One benefit is that such objects are - passed through the queue efficiently. - - The key difference, though, is conceptual: the corresponding - object returned from Queue.get() will be strictly equivalent - to the given obj. In other words, the two objects will be - effectively indistinguishable from each other, even if the - object is mutable. The received object may actually be the - same object, or a copy (immutable values only), or a proxy. - Regardless, the received object should be treated as though - the original has been shared directly, whether or not it - actually is. That's a slightly different and stronger promise - than just (initial) equality, which is all "syncobj=False" - can promise. - - "unbound" controls the behavior of Queue.get() for the given + "unbounditems" controls the behavior of Queue.get() for the given object if the current interpreter (calling put()) is later destroyed. - If "unbound" is None (the default) then it uses the + If "unbounditems" is None (the default) then it uses the queue's default, set with create_queue(), which is usually UNBOUND. - If "unbound" is UNBOUND_ERROR then get() will raise an + If "unbounditems" is UNBOUND_ERROR then get() will raise an ItemInterpreterDestroyed exception if the original interpreter has been destroyed. This does not otherwise affect the queue; the next call to put() will work like normal, returning the next item in the queue. - If "unbound" is UNBOUND_REMOVE then the item will be removed + If "unbounditems" is UNBOUND_REMOVE then the item will be removed from the queue as soon as the original interpreter is destroyed. Be aware that this will introduce an imbalance between put() and get() calls. - If "unbound" is UNBOUND then it is returned by get() in place + If "unbounditems" is UNBOUND then it is returned by get() in place of the unbound item. """ - if syncobj is None: - fmt = self._fmt - else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound + if not block: + return self.put_nowait(obj, unbounditems=unbounditems) + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) if timeout is not None: timeout = int(timeout) if timeout < 0: raise ValueError(f'timeout value must be non-negative') end = time.time() + timeout - if fmt is _PICKLED: - obj = pickle.dumps(obj) while True: try: - _queues.put(self._id, obj, fmt, unboundop) + _queues.put(self._id, obj, unboundop) except QueueFull as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -243,30 +231,26 @@ def put(self, obj, timeout=None, *, else: break - def put_nowait(self, obj, *, syncobj=None, unbound=None): - if syncobj is None: - fmt = self._fmt + def put_nowait(self, obj, *, unbounditems=None): + if unbounditems is None: + unboundop = -1 else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound - else: - unboundop, = _serialize_unbound(unbound) - if fmt is _PICKLED: - obj = pickle.dumps(obj) - _queues.put(self._id, obj, fmt, unboundop) + unboundop, = _serialize_unbound(unbounditems) + _queues.put(self._id, obj, unboundop) - def get(self, timeout=None, *, + def get(self, block=True, timeout=None, *, _delay=10 / 1000, # 10 milliseconds ): """Return the next object from the queue. - This blocks while the queue is empty. + If "block" is true, this blocks while the queue is empty. If the next item's original interpreter has been destroyed then the "next object" is determined by the value of the - "unbound" argument to put(). + "unbounditems" argument to put(). """ + if not block: + return self.get_nowait() if timeout is not None: timeout = int(timeout) if timeout < 0: @@ -274,7 +258,7 @@ def get(self, timeout=None, *, end = time.time() + timeout while True: try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -284,10 +268,6 @@ def get(self, timeout=None, *, if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj def get_nowait(self): @@ -297,16 +277,12 @@ def get_nowait(self): is the same as get(). """ try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: raise # re-raise if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj diff --git a/Lib/configparser.py b/Lib/configparser.py index 239fda60a02ca0..18af1eadaad111 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1218,11 +1218,14 @@ def _convert_to_boolean(self, value): def _validate_key_contents(self, key): """Raises an InvalidWriteError for any keys containing - delimiters or that match the section header pattern""" + delimiters or that begins with the section header pattern""" if re.match(self.SECTCRE, key): - raise InvalidWriteError("Cannot write keys matching section pattern") - if any(delim in key for delim in self._delimiters): - raise InvalidWriteError("Cannot write key that contains delimiters") + raise InvalidWriteError( + f"Cannot write key {key}; begins with section pattern") + for delim in self._delimiters: + if delim in key: + raise InvalidWriteError( + f"Cannot write key {key}; contains delimiter {delim}") def _validate_value_types(self, *, section="", option="", value=""): """Raises a TypeError for illegal non-string values. diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 823a3692fd1bbf..04ec0270148e0e 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -1,6 +1,8 @@ """create and manipulate C data types in Python""" -import os as _os, sys as _sys +import os as _os +import sys as _sys +import sysconfig as _sysconfig import types as _types __version__ = "1.1.0" @@ -108,7 +110,7 @@ class CFunctionType(_CFuncPtr): return CFunctionType if _os.name == "nt": - from _ctypes import LoadLibrary as _dlopen + from _ctypes import LoadLibrary as _LoadLibrary from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL _win_functype_cache = {} @@ -416,52 +418,59 @@ def __init__(self, name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None): + class _FuncPtr(_CFuncPtr): + _flags_ = self._func_flags_ + _restype_ = self._func_restype_ + if use_errno: + _flags_ |= _FUNCFLAG_USE_ERRNO + if use_last_error: + _flags_ |= _FUNCFLAG_USE_LASTERROR + + self._FuncPtr = _FuncPtr if name: name = _os.fspath(name) + self._handle = self._load_library(name, mode, handle, winmode) + + if _os.name == "nt": + def _load_library(self, name, mode, handle, winmode): + if winmode is None: + import nt as _nt + winmode = _nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS + # WINAPI LoadLibrary searches for a DLL if the given name + # is not fully qualified with an explicit drive. For POSIX + # compatibility, and because the DLL search path no longer + # contains the working directory, begin by fully resolving + # any name that contains a path separator. + if name is not None and ('/' in name or '\\' in name): + name = _nt._getfullpathname(name) + winmode |= _nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + self._name = name + if handle is not None: + return handle + return _LoadLibrary(self._name, winmode) + + else: + def _load_library(self, name, mode, handle, winmode): # If the filename that has been provided is an iOS/tvOS/watchOS # .fwork file, dereference the location to the true origin of the # binary. - if name.endswith(".fwork"): + if name and name.endswith(".fwork"): with open(name) as f: name = _os.path.join( _os.path.dirname(_sys.executable), f.read().strip() ) - - self._name = name - flags = self._func_flags_ - if use_errno: - flags |= _FUNCFLAG_USE_ERRNO - if use_last_error: - flags |= _FUNCFLAG_USE_LASTERROR - if _sys.platform.startswith("aix"): - """When the name contains ".a(" and ends with ")", - e.g., "libFOO.a(libFOO.so)" - this is taken to be an - archive(member) syntax for dlopen(), and the mode is adjusted. - Otherwise, name is presented to dlopen() as a file argument. - """ - if name and name.endswith(")") and ".a(" in name: - mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW ) - if _os.name == "nt": - if winmode is not None: - mode = winmode - else: - import nt - mode = nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS - if '/' in name or '\\' in name: - self._name = nt._getfullpathname(self._name) - mode |= nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR - - class _FuncPtr(_CFuncPtr): - _flags_ = flags - _restype_ = self._func_restype_ - self._FuncPtr = _FuncPtr - - if handle is None: - self._handle = _dlopen(self._name, mode) - else: - self._handle = handle + if _sys.platform.startswith("aix"): + """When the name contains ".a(" and ends with ")", + e.g., "libFOO.a(libFOO.so)" - this is taken to be an + archive(member) syntax for dlopen(), and the mode is adjusted. + Otherwise, name is presented to dlopen() as a file argument. + """ + if name and name.endswith(")") and ".a(" in name: + mode |= _os.RTLD_MEMBER | _os.RTLD_NOW + self._name = name + return _dlopen(name, mode) def __repr__(self): return "<%s '%s', handle %x at %#x>" % \ @@ -549,10 +558,9 @@ def LoadLibrary(self, name): if _os.name == "nt": pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "android": - pythonapi = PyDLL("libpython%d.%d.so" % _sys.version_info[:2]) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) +elif _sys.platform in ["android", "cygwin"]: + # These are Unix-like platforms which use a dynamically-linked libpython. + pythonapi = PyDLL(_sysconfig.get_config_var("LDLIBRARY")) else: pythonapi = PyDLL(None) diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py index 99504911a3dbe0..378f12167c6842 100644 --- a/Lib/ctypes/util.py +++ b/Lib/ctypes/util.py @@ -173,6 +173,25 @@ def find_library(name): fname = f"{directory}/lib{name}.so" return fname if os.path.isfile(fname) else None +elif sys.platform == "emscripten": + def _is_wasm(filename): + # Return True if the given file is an WASM module + wasm_header = b"\x00asm" + with open(filename, 'br') as thefile: + return thefile.read(4) == wasm_header + + def find_library(name): + candidates = [f"lib{name}.so", f"lib{name}.wasm"] + paths = os.environ.get("LD_LIBRARY_PATH", "") + for libdir in paths.split(":"): + for name in candidates: + libfile = os.path.join(libdir, name) + + if os.path.isfile(libfile) and _is_wasm(libfile): + return libfile + + return None + elif os.name == "posix": # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump import re, tempfile diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 86d29df0639184..c8dbb247745ab7 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -441,9 +441,11 @@ def __init__(self, globals): self.locals = {} self.overwrite_errors = {} self.unconditional_adds = {} + self.method_annotations = {} def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, - overwrite_error=False, unconditional_add=False, decorator=None): + overwrite_error=False, unconditional_add=False, decorator=None, + annotation_fields=None): if locals is not None: self.locals.update(locals) @@ -464,16 +466,14 @@ def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, self.names.append(name) - if return_type is not MISSING: - self.locals[f'__dataclass_{name}_return_type__'] = return_type - return_annotation = f'->__dataclass_{name}_return_type__' - else: - return_annotation = '' + if annotation_fields is not None: + self.method_annotations[name] = (annotation_fields, return_type) + args = ','.join(args) body = '\n'.join(body) # Compute the text of the entire function, add it to the text we're generating. - self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}){return_annotation}:\n{body}') + self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}):\n{body}') def add_fns_to_class(self, cls): # The source to all of the functions we're generating. @@ -509,6 +509,15 @@ def add_fns_to_class(self, cls): # Now that we've generated the functions, assign them into cls. for name, fn in zip(self.names, fns): fn.__qualname__ = f"{cls.__qualname__}.{fn.__name__}" + + try: + annotation_fields, return_type = self.method_annotations[name] + except KeyError: + pass + else: + annotate_fn = _make_annotate_function(cls, name, annotation_fields, return_type) + fn.__annotate__ = annotate_fn + if self.unconditional_adds.get(name, False): setattr(cls, name, fn) else: @@ -524,6 +533,49 @@ def add_fns_to_class(self, cls): raise TypeError(error_msg) +def _make_annotate_function(__class__, method_name, annotation_fields, return_type): + # Create an __annotate__ function for a dataclass + # Try to return annotations in the same format as they would be + # from a regular __init__ function + + def __annotate__(format, /): + Format = annotationlib.Format + match format: + case Format.VALUE | Format.FORWARDREF | Format.STRING: + cls_annotations = {} + for base in reversed(__class__.__mro__): + cls_annotations.update( + annotationlib.get_annotations(base, format=format) + ) + + new_annotations = {} + for k in annotation_fields: + # gh-142214: The annotation may be missing in unusual dynamic cases. + # If so, just skip it. + try: + new_annotations[k] = cls_annotations[k] + except KeyError: + pass + + if return_type is not MISSING: + if format == Format.STRING: + new_annotations["return"] = annotationlib.type_repr(return_type) + else: + new_annotations["return"] = return_type + + return new_annotations + + case _: + raise NotImplementedError(format) + + # This is a flag for _add_slots to know it needs to regenerate this method + # In order to remove references to the original class when it is replaced + __annotate__.__generated_by_dataclasses__ = True + __annotate__.__qualname__ = f"{__class__.__qualname__}.{method_name}.__annotate__" + + return __annotate__ + + def _field_assign(frozen, name, value, self_name): # If we're a frozen class, then assign to our fields in __init__ # via object.__setattr__. Otherwise, just use a simple @@ -612,7 +664,7 @@ def _init_param(f): elif f.default_factory is not MISSING: # There's a factory function. Set a marker. default = '=__dataclass_HAS_DEFAULT_FACTORY__' - return f'{f.name}:__dataclass_type_{f.name}__{default}' + return f'{f.name}{default}' def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, @@ -635,11 +687,10 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, raise TypeError(f'non-default argument {f.name!r} ' f'follows default argument {seen_default.name!r}') - locals = {**{f'__dataclass_type_{f.name}__': f.type for f in fields}, - **{'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, - '__dataclass_builtins_object__': object, - } - } + annotation_fields = [f.name for f in fields if f.init] + + locals = {'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, + '__dataclass_builtins_object__': object} body_lines = [] for f in fields: @@ -670,7 +721,8 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, [self_name] + _init_params, body_lines, locals=locals, - return_type=None) + return_type=None, + annotation_fields=annotation_fields) def _frozen_get_del_attr(cls, fields, func_builder): @@ -1265,7 +1317,7 @@ def _create_slots(defined_fields, inherited_slots, field_names, weakref_slot): doc = getattr(defined_fields.get(slot), 'doc', None) if doc is not None: seen_docs = True - slots.update({slot: doc}) + slots[slot] = doc # We only return dict if there's at least one doc member, # otherwise we return tuple, which is the old default format. @@ -1283,6 +1335,10 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): if '__slots__' in cls.__dict__: raise TypeError(f'{cls.__name__} already specifies __slots__') + # gh-102069: Remove existing __weakref__ descriptor. + # gh-135228: Make sure the original class can be garbage collected. + sys._clear_type_descriptors(cls) + # Create a new dict for our new class. cls_dict = dict(cls.__dict__) field_names = tuple(f.name for f in fields(cls)) @@ -1300,12 +1356,6 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): # available in _MARKER. cls_dict.pop(field_name, None) - # Remove __dict__ itself. - cls_dict.pop('__dict__', None) - - # Clear existing `__weakref__` descriptor, it belongs to a previous type: - cls_dict.pop('__weakref__', None) # gh-102069 - # And finally create the class. qualname = getattr(cls, '__qualname__', None) newcls = type(cls)(cls.__name__, cls.__bases__, cls_dict) @@ -1338,6 +1388,26 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): or _update_func_cell_for__class__(member.fdel, cls, newcls)): break + # Get new annotations to remove references to the original class + # in forward references + newcls_ann = annotationlib.get_annotations( + newcls, format=annotationlib.Format.FORWARDREF) + + # Fix references in dataclass Fields + for f in getattr(newcls, _FIELDS).values(): + try: + ann = newcls_ann[f.name] + except KeyError: + pass + else: + f.type = ann + + # Fix the class reference in the __annotate__ method + init = newcls.__init__ + if init_annotate := getattr(init, "__annotate__", None): + if getattr(init_annotate, "__generated_by_dataclasses__", False): + _update_func_cell_for__class__(init_annotate, cls, newcls) + return newcls diff --git a/Lib/dbm/sqlite3.py b/Lib/dbm/sqlite3.py index 7e0ae2a29e3a64..d0eed54e0f84c1 100644 --- a/Lib/dbm/sqlite3.py +++ b/Lib/dbm/sqlite3.py @@ -59,18 +59,22 @@ def __init__(self, path, /, *, flag, mode): # We use the URI format when opening the database. uri = _normalize_uri(path) uri = f"{uri}?mode={flag}" + if flag == "ro": + # Add immutable=1 to allow read-only SQLite access even if wal/shm missing + uri += "&immutable=1" try: self._cx = sqlite3.connect(uri, autocommit=True, uri=True) except sqlite3.Error as exc: raise error(str(exc)) - # This is an optimization only; it's ok if it fails. - with suppress(sqlite3.OperationalError): - self._cx.execute("PRAGMA journal_mode = wal") + if flag != "ro": + # This is an optimization only; it's ok if it fails. + with suppress(sqlite3.OperationalError): + self._cx.execute("PRAGMA journal_mode = wal") - if flag == "rwc": - self._execute(BUILD_TABLE) + if flag == "rwc": + self._execute(BUILD_TABLE) def _execute(self, *args, **kwargs): if not self._cx: diff --git a/Lib/difflib.py b/Lib/difflib.py index f1f4e62514a7bd..ac1ba4a6e4ee94 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, diff --git a/Lib/doctest.py b/Lib/doctest.py index 2acb6cb79f394d..a66888d8fc9673 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -94,6 +94,7 @@ def _test(): import __future__ import difflib +import functools import inspect import linecache import os @@ -108,6 +109,8 @@ def _test(): from _colorize import ANSIColors, can_colorize +__unittest = True + class TestResults(namedtuple('TestResults', 'failed attempted')): def __new__(cls, failed, attempted, *, skipped=0): results = super().__new__(cls, failed, attempted) @@ -1140,7 +1143,9 @@ def _find_lineno(self, obj, source_lines): if inspect.ismethod(obj): obj = obj.__func__ if isinstance(obj, property): obj = obj.fget - if inspect.isfunction(obj) and getattr(obj, '__doc__', None): + if isinstance(obj, functools.cached_property): + obj = obj.func + if inspect.isroutine(obj) and getattr(obj, '__doc__', None): # We don't use `docstring` var here, because `obj` can be changed. obj = inspect.unwrap(obj) try: @@ -1395,11 +1400,11 @@ def __run(self, test, compileflags, out): exec(compile(example.source, filename, "single", compileflags, True), test.globs) self.debugger.set_continue() # ==== Example Finished ==== - exception = None + exc_info = None except KeyboardInterrupt: raise - except: - exception = sys.exc_info() + except BaseException as exc: + exc_info = type(exc), exc, exc.__traceback__.tb_next self.debugger.set_continue() # ==== Example Finished ==== got = self._fakeout.getvalue() # the actual output @@ -1408,21 +1413,21 @@ def __run(self, test, compileflags, out): # If the example executed without raising any exceptions, # verify its output. - if exception is None: + if exc_info is None: if check(example.want, got, self.optionflags): outcome = SUCCESS # The example raised an exception: check if it was expected. else: - formatted_ex = traceback.format_exception_only(*exception[:2]) - if issubclass(exception[0], SyntaxError): + formatted_ex = traceback.format_exception_only(*exc_info[:2]) + if issubclass(exc_info[0], SyntaxError): # SyntaxError / IndentationError is special: # we don't care about the carets / suggestions / etc # We only care about the error message and notes. # They start with `SyntaxError:` (or any other class name) exception_line_prefixes = ( - f"{exception[0].__qualname__}:", - f"{exception[0].__module__}.{exception[0].__qualname__}:", + f"{exc_info[0].__qualname__}:", + f"{exc_info[0].__module__}.{exc_info[0].__qualname__}:", ) exc_msg_index = next( index @@ -1433,7 +1438,7 @@ def __run(self, test, compileflags, out): exc_msg = "".join(formatted_ex) if not quiet: - got += _exception_traceback(exception) + got += _exception_traceback(exc_info) # If `example.exc_msg` is None, then we weren't expecting # an exception. @@ -1462,7 +1467,7 @@ def __run(self, test, compileflags, out): elif outcome is BOOM: if not quiet: self.report_unexpected_exception(out, test, example, - exception) + exc_info) failures += 1 else: assert False, ("unknown outcome", outcome) @@ -1989,8 +1994,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, from module m (or the current module if m is not supplied), starting with m.__doc__. - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; + Also test examples reachable from dict m.__test__ if it exists. + m.__test__ maps names to functions, classes and strings; function and class docstrings are tested even if the name is private; strings are tested directly, as if they were docstrings. @@ -2324,7 +2329,7 @@ def runTest(self): sys.stdout = old if results.failed: - raise self.failureException(self.format_failure(new.getvalue())) + raise self.failureException(self.format_failure(new.getvalue().rstrip('\n'))) def format_failure(self, err): test = self._dt_test diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9a51b9437333db..41401f8f8d54da 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1020,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False @@ -1573,7 +1575,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1592,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) @@ -2786,6 +2788,9 @@ def _steal_trailing_WSP_if_exists(lines): if lines and lines[-1] and lines[-1][-1] in WSP: wsp = lines[-1][-1] lines[-1] = lines[-1][:-1] + # gh-142006: if the line is now empty, remove it entirely. + if not lines[-1]: + lines.pop() return wsp def _refold_parse_tree(parse_tree, *, policy): diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 84917038874ba1..6a7c5fa06d20b6 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -146,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -233,9 +234,11 @@ def __init__(self, field): self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies '.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 95e79b8938bb4c..e23843df44881f 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -380,7 +380,7 @@ def _fold(self, name, value, sanitize): h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the - # default value of 78, as recommended by RFC 2822. + # default value of 78, as recommended by RFC 5322 section 2.1.1. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index b4f5830beada4a..11d1536db27d79 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -2,6 +2,7 @@ import email.charset import email.message import email.errors +import sys from email import quoprimime class ContentManager: @@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length): def _encode_text(string, charset, cte, policy): + # If max_line_length is 0 or None, there is no limit. + maxlen = policy.max_line_length or sys.maxsize lines = string.encode(charset).splitlines() linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' if cte is None: # Use heuristics to decide on the "best" encoding. - if max((len(x) for x in lines), default=0) <= policy.max_line_length: + if max(map(len, lines), default=0) <= maxlen: try: return '7bit', normal_body(lines).decode('ascii') except UnicodeDecodeError: @@ -156,8 +159,7 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' if policy.cte_type == '8bit': return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) - sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), - policy.max_line_length) + sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen) sniff_base64 = binascii.b2a_base64(sniff) # This is a little unfair to qp; it includes lineseps, base64 doesn't. if len(sniff_qp) > len(sniff_base64): @@ -172,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' data = normal_body(lines).decode('ascii', 'surrogateescape') elif cte == 'quoted-printable': data = quoprimime.body_encode(normal_body(lines).decode('latin-1'), - policy.max_line_length) + maxlen) elif cte == 'base64': - data = _encode_base64(embedded_body(lines), policy.max_line_length) + data = _encode_base64(embedded_body(lines), maxlen) else: raise ValueError("Unknown content transfer encoding {}".format(cte)) return cte, data diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 9d80a5822af48d..ae8ef32792b3e9 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -32,7 +32,7 @@ NLCRE_bol = re.compile(r'(\r\n|\r|\n)') NLCRE_eol = re.compile(r'(\r\n|\r|\n)\z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' @@ -294,7 +294,7 @@ def _parsegen(self): return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -504,10 +504,9 @@ def _parse_headers(self, lines): self._input.unreadline(line) return else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. + # Weirdly placed unix-from line. defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue # Split the line on the colon separating field name from value. # There will always be a colon, because if there wasn't the part of @@ -519,7 +518,7 @@ def _parse_headers(self, lines): # message. Track the error but keep going. if i == 0: defect = errors.InvalidHeaderDefect("Missing header name.") - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue assert i>0, "_parse_headers fed line with no : and no leading WS" diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ab5bd0653e440c..03524c96559153 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, diff --git a/Lib/email/header.py b/Lib/email/header.py index 113a81f41314ec..220a84a7454b21 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -59,16 +59,22 @@ def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) diff --git a/Lib/email/message.py b/Lib/email/message.py index 87fcab68868d46..641fb2e944d431 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -74,19 +74,25 @@ def _parseparam(s): # RDM This might be a Header, so for now stringify it. s = ';' + str(s) plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) + start = 0 + while s.find(';', start) == start: + start += 1 + end = s.find(';', start) + ind, diff = start, 0 + while end > 0: + diff += s.count('"', ind, end) - s.count('\\"', ind, end) + if diff % 2 == 0: + break + end, ind = ind, s.find(';', end + 1) if end < 0: end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() + i = s.find('=', start, end) + if i == -1: + f = s[start:end] + else: + f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip() plist.append(f.strip()) - s = s[end:] + start = end return plist @@ -135,7 +141,7 @@ def _decode_uu(encoded): class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message @@ -313,6 +319,8 @@ def get_payload(self, i=None, decode=False): # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') + else: + bpayload = payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -564,7 +572,7 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 039f03cba74fa0..c6a51dd8e377b8 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -2,7 +2,7 @@ # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -15,13 +15,13 @@ class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header + The string must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -75,13 +75,13 @@ def parsestr(self, text, headersonly=True): class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header + The input must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 7eab74dc0db9df..3de1f0d24a15b0 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -417,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index a94bb270671e37..4ecb6b6e297a13 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -405,6 +405,8 @@ 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', + 'iso_8859_8_i' : 'iso8859_8', + 'iso_8859_8_e' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 60a8d5eb227f82..0c90b4c9fe18fa 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -316,7 +316,7 @@ def _buffer_encode(self, input, errors, final): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling: {errors}") + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523496..df164ca5b9549c 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index aa641e94a8b336..21bbfad0fe6b3e 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -10,7 +10,7 @@ __all__ = ["version", "bootstrap"] -_PIP_VERSION = "25.1.1" +_PIP_VERSION = "25.3" # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora diff --git a/Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl b/Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl similarity index 57% rename from Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl rename to Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl index 2fdcfbf9ff8139..755e1aa0c3dc5a 100644 Binary files a/Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl and b/Lib/ensurepip/_bundled/pip-25.3-py3-none-any.whl differ diff --git a/Lib/enum.py b/Lib/enum.py index 01fecca3e5aac0..8a72c409b94a8c 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -1965,7 +1965,7 @@ def __call__(self, enumeration): if 2**i not in values: missing.append(2**i) elif enum_type == 'enum': - # check for powers of one + # check for missing consecutive integers for i in range(low+1, high): if i not in values: missing.append(i) diff --git a/Lib/fractions.py b/Lib/fractions.py index 8163e3bb594f6b..a497ee19935345 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -168,9 +168,13 @@ def _round_to_figures(n, d, figures): # A '0' that's *not* followed by another digit is parsed as a minimum width # rather than a zeropad flag. (?P0(?=[0-9]))? - (?P0|[1-9][0-9]*)? + (?P[0-9]+)? (?P[,_])? - (?:\.(?P0|[1-9][0-9]*))? + (?:\. + (?=[,_0-9]) # lookahead for digit or separator + (?P[0-9]+)? + (?P[,_])? + )? (?P[eEfFgG%]) """, re.DOTALL | re.VERBOSE).fullmatch @@ -499,11 +503,15 @@ def _format_float_style(self, match): minimumwidth = int(match["minimumwidth"] or "0") thousands_sep = match["thousands_sep"] precision = int(match["precision"] or "6") + frac_sep = match["frac_separators"] or "" presentation_type = match["presentation_type"] trim_zeros = presentation_type in "gG" and not alternate_form trim_point = not alternate_form exponent_indicator = "E" if presentation_type in "EFG" else "e" + if align == '=' and fill == '0': + zeropad = True + # Round to get the digits we need, figure out where to place the point, # and decide whether to use scientific notation. 'point_pos' is the # relative to the _end_ of the digit string: that is, it's the number @@ -552,6 +560,9 @@ def _format_float_style(self, match): if trim_zeros: frac_part = frac_part.rstrip("0") separator = "" if trim_point and not frac_part else "." + if frac_sep: + frac_part = frac_sep.join(frac_part[pos:pos + 3] + for pos in range(0, len(frac_part), 3)) trailing = separator + frac_part + suffix # Do zero padding if required. diff --git a/Lib/functools.py b/Lib/functools.py index 714070c6ac9460..df4660eef3fe82 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -323,6 +323,9 @@ def _partial_new(cls, func, /, *args, **keywords): "or a descriptor") if args and args[-1] is Placeholder: raise TypeError("trailing Placeholders are not allowed") + for value in keywords.values(): + if value is Placeholder: + raise TypeError("Placeholder cannot be passed as a keyword argument") if isinstance(func, base_cls): pto_phcount = func._phcount tot_args = func.args @@ -992,8 +995,7 @@ def wrapper(*args, **kw): class singledispatchmethod: """Single-dispatch generic method descriptor. - Supports wrapping existing descriptors and handles non-descriptor - callables as instance methods. + Supports wrapping existing descriptors. """ def __init__(self, func): diff --git a/Lib/genericpath.py b/Lib/genericpath.py index ba7b0a13c7f81d..9363f564aab7a6 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -8,7 +8,7 @@ __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', 'getsize', 'isdevdrive', 'isdir', 'isfile', 'isjunction', 'islink', - 'lexists', 'samefile', 'sameopenfile', 'samestat'] + 'lexists', 'samefile', 'sameopenfile', 'samestat', 'ALLOW_MISSING'] # Does a path exist? @@ -189,3 +189,12 @@ def _check_arg_types(funcname, *args): f'os.PathLike object, not {s.__class__.__name__!r}') from None if hasstr and hasbytes: raise TypeError("Can't mix strings and bytes in path components") from None + +# A singleton with a true boolean value. +@object.__new__ +class ALLOW_MISSING: + """Special value for use in realpath().""" + def __repr__(self): + return 'os.path.ALLOW_MISSING' + def __reduce__(self): + return self.__class__.__name__ diff --git a/Lib/getpass.py b/Lib/getpass.py index f571425e54178a..3d9bb1f0d146a4 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -33,8 +33,8 @@ def unix_getpass(prompt='Password: ', stream=None, *, echo_char=None): prompt: Written on stream to ask for the input. Default: 'Password: ' stream: A writable file object to display the prompt. Defaults to the tty. If no tty is available defaults to sys.stderr. - echo_char: A string used to mask input (e.g., '*'). If None, input is - hidden. + echo_char: A single ASCII character to mask input (e.g., '*'). + If None, input is hidden. Returns: The seKr3t input. Raises: @@ -119,9 +119,9 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): raise KeyboardInterrupt if c == '\b': if echo_char and pw: - msvcrt.putch('\b') - msvcrt.putch(' ') - msvcrt.putch('\b') + msvcrt.putwch('\b') + msvcrt.putwch(' ') + msvcrt.putwch('\b') pw = pw[:-1] else: pw = pw + c @@ -132,21 +132,31 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): return pw -def fallback_getpass(prompt='Password: ', stream=None): +def fallback_getpass(prompt='Password: ', stream=None, *, echo_char=None): + _check_echo_char(echo_char) import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: stream = sys.stderr print("Warning: Password input may be echoed.", file=stream) - return _raw_input(prompt, stream) + return _raw_input(prompt, stream, echo_char=echo_char) def _check_echo_char(echo_char): - # ASCII excluding control characters - if echo_char and not (echo_char.isprintable() and echo_char.isascii()): - raise ValueError("'echo_char' must be a printable ASCII string, " - f"got: {echo_char!r}") + # Single-character ASCII excluding control characters + if echo_char is None: + return + if not isinstance(echo_char, str): + raise TypeError("'echo_char' must be a str or None, not " + f"{type(echo_char).__name__}") + if not ( + len(echo_char) == 1 + and echo_char.isprintable() + and echo_char.isascii() + ): + raise ValueError("'echo_char' must be a single printable ASCII " + f"character, got: {echo_char!r}") def _raw_input(prompt="", stream=None, input=None, echo_char=None): diff --git a/Lib/glob.py b/Lib/glob.py index 341524282ba675..f1a87c82fc5585 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -22,6 +22,9 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns by default. + The order of the returned list is undefined. Sort it if you need a + particular order. + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden directories. @@ -40,6 +43,9 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns. + The order of the returned paths is undefined. Sort them if you need a + particular order. + If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ diff --git a/Lib/hashlib.py b/Lib/hashlib.py index abacac22ea0106..0e9bd98aa1fc31 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -141,29 +141,29 @@ def __get_openssl_constructor(name): return __get_builtin_constructor(name) -def __py_new(name, data=b'', **kwargs): +def __py_new(name, *args, **kwargs): """new(name, data=b'', **kwargs) - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) -def __hash_new(name, data=b'', **kwargs): +def __hash_new(name, *args, **kwargs): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ if name in __block_openssl_constructor: # Prefer our builtin blake2 implementation. - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) try: - return _hashlib.new(name, data, **kwargs) + return _hashlib.new(name, *args, **kwargs) except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. # This allows for SHA224/256 and SHA384/512 support even though # the OpenSSL library prior to 0.9.8 doesn't provide them. - return __get_builtin_constructor(name)(data) + return __get_builtin_constructor(name)(*args, **kwargs) try: diff --git a/Lib/heapq.py b/Lib/heapq.py index 6ceb211f1ca2ae..17f62dd2d5839b 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -126,8 +126,9 @@ From all times, sorting has always been a Great Art! :-) """ -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', - 'nlargest', 'nsmallest', 'heappushpop'] +__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'heappushpop', + 'heappush_max', 'heappop_max', 'heapify_max', 'heapreplace_max', + 'heappushpop_max', 'nlargest', 'nsmallest', 'merge'] def heappush(heap, item): """Push item onto heap, maintaining the heap invariant.""" diff --git a/Lib/hmac.py b/Lib/hmac.py index 3683a4aa653a0a..16022c9ceb5439 100644 --- a/Lib/hmac.py +++ b/Lib/hmac.py @@ -229,6 +229,14 @@ def digest(key, msg, digest): if _hashopenssl and isinstance(digest, (str, _functype)): try: return _hashopenssl.hmac_digest(key, msg, digest) + except OverflowError: + # OpenSSL's HMAC limits the size of the key to INT_MAX. + # Instead of falling back to HACL* implementation which + # may still not be supported due to a too large key, we + # directly switch to the pure Python fallback instead + # even if we could have used streaming HMAC for small keys + # but large messages. + return _compute_digest_fallback(key, msg, digest) except _hashopenssl.UnsupportedDigestmodError: pass @@ -236,6 +244,10 @@ def digest(key, msg, digest): try: return _hmac.compute_digest(key, msg, digest) except (OverflowError, _hmac.UnknownHashError): + # HACL* HMAC limits the size of the key to UINT32_MAX + # so we fallback to the pure Python implementation even + # if streaming HMAC may have been used for small keys + # and large messages. pass return _compute_digest_fallback(key, msg, digest) diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 13c95c34e505c8..80fb8c3f929f6b 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,20 +24,52 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +incomplete_charref = re.compile('&#(?:[0-9]|[xX][0-9a-fA-F])') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -53,10 +86,24 @@ \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -81,16 +128,25 @@ class HTMLParser(_markupbase.ParserBase): argument. """ - CDATA_CONTENT_ELEMENTS = ("script", "style") + # See the HTML5 specs section "13.4 Parsing HTML fragments". + # https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments + # CDATA_CONTENT_ELEMENTS are parsed in RAWTEXT mode + CDATA_CONTENT_ELEMENTS = ("script", "style", "xmp", "iframe", "noembed", "noframes") + RCDATA_CONTENT_ELEMENTS = ("textarea", "title") - def __init__(self, *, convert_charrefs=True): + def __init__(self, *, convert_charrefs=True, scripting=False): """Initialize and reset this instance. - If convert_charrefs is True (the default), all character references + If convert_charrefs is true (the default), all character references are automatically converted to the corresponding Unicode characters. + + If *scripting* is false (the default), the content of the + ``noscript`` element is parsed normally; if it's true, + it's returned as is without being parsed. """ super().__init__() self.convert_charrefs = convert_charrefs + self.scripting = scripting self.reset() def reset(self): @@ -99,6 +155,8 @@ def reset(self): self.lasttag = '???' self.interesting = interesting_normal self.cdata_elem = None + self._support_cdata = True + self._escapable = True super().reset() def feed(self, data): @@ -120,13 +178,35 @@ def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - def set_cdata_mode(self, elem): + def set_cdata_mode(self, elem, *, escapable=False): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self._escapable = escapable + if self.cdata_elem == 'plaintext': + self.interesting = re.compile(r'\z') + elif escapable and not self.convert_charrefs: + self.interesting = re.compile(r'&|])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) + else: + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal self.cdata_elem = None + self._escapable = True + + def _set_support_cdata(self, flag=True): + """Enable or disable support of the CDATA sections. + If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>". + If disabled, "<[CDATA[" starts a bogus comments which ends with ">". + + This method is not called by default. Its purpose is to be called + in custom handle_starttag() and handle_endtag() methods, with + value that depends on the adjusted current node. + See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state + for details. + """ + self._support_cdata = flag # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is @@ -147,7 +227,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -159,7 +239,7 @@ def goahead(self, end): break j = n if i < j: - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and self._escapable: self.handle_data(unescape(rawdata[i:j])) else: self.handle_data(rawdata[i:j]) @@ -177,7 +257,7 @@ def goahead(self, end): k = self.parse_pi(i) elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith("', i+9) + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 elif rawdata[i:i+9].lower() == ' gtpos = rawdata.find('>', i+9) @@ -272,12 +382,27 @@ def parse_html_declaration(self, i): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith('' + "" + '' + '' + '' + '\u2603' +) + +SAMPLE_RAWTEXT = SAMPLE_RCDATA + '&☺' class EventCollector(html.parser.HTMLParser): - def __init__(self, *args, **kw): + def __init__(self, *args, autocdata=False, **kw): + self.autocdata = autocdata self.events = [] self.append = self.events.append html.parser.HTMLParser.__init__(self, *args, **kw) + if autocdata: + self._set_support_cdata(False) def get_events(self): # Normalize the list of events so that buffer artefacts don't @@ -33,12 +49,16 @@ def get_events(self): def handle_starttag(self, tag, attrs): self.append(("starttag", tag, attrs)) + if self.autocdata and tag == 'svg': + self._set_support_cdata(True) def handle_startendtag(self, tag, attrs): self.append(("startendtag", tag, attrs)) def handle_endtag(self, tag): self.append(("endtag", tag)) + if self.autocdata and tag == 'svg': + self._set_support_cdata(False) # all other markup @@ -80,14 +100,22 @@ def handle_entityref(self, data): self.fail('This should never be called with convert_charrefs=True') +# The normal event collector normalizes the events in get_events, +# so we override it to return the original list of events. +class EventCollectorNoNormalize(EventCollector): + def get_events(self): + return self.events + + class TestCaseBase(unittest.TestCase): - def get_collector(self): - return EventCollector(convert_charrefs=False) + def get_collector(self, convert_charrefs=False): + return EventCollector(convert_charrefs=convert_charrefs) - def _run_check(self, source, expected_events, collector=None): + def _run_check(self, source, expected_events, + *, collector=None, convert_charrefs=False): if collector is None: - collector = self.get_collector() + collector = self.get_collector(convert_charrefs=convert_charrefs) parser = collector for s in source: parser.feed(s) @@ -101,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None): def _run_check_extra(self, source, events): self._run_check(source, events, - EventCollectorExtra(convert_charrefs=False)) + collector=EventCollectorExtra(convert_charrefs=False)) class HTMLParserTestCase(TestCaseBase): @@ -160,10 +188,87 @@ def test_malformatted_charref(self): ]) def test_unclosed_entityref(self): - self._run_check("&entityref foo", [ - ("entityref", "entityref"), - ("data", " foo"), - ]) + self._run_check('> <', [('entityref', 'gt'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('> <', [('data', '> <')], convert_charrefs=True) + + self._run_check('&undefined <', + [('entityref', 'undefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('&undefined <', [('data', '&undefined <')], + convert_charrefs=True) + + self._run_check('>undefined <', + [('entityref', 'gtundefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('>undefined <', [('data', '>undefined <')], + convert_charrefs=True) + + self._run_check('& <', [('data', '& '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('& <', [('data', '& <')], convert_charrefs=True) + + def test_eof_in_entityref(self): + self._run_check('>', [('entityref', 'gt')], convert_charrefs=False) + self._run_check('>', [('data', '>')], convert_charrefs=True) + + self._run_check('&g', [('entityref', 'g')], convert_charrefs=False) + self._run_check('&g', [('data', '&g')], convert_charrefs=True) + + self._run_check('&undefined', [('entityref', 'undefined')], + convert_charrefs=False) + self._run_check('&undefined', [('data', '&undefined')], + convert_charrefs=True) + + self._run_check('>undefined', [('entityref', 'gtundefined')], + convert_charrefs=False) + self._run_check('>undefined', [('data', '>undefined')], + convert_charrefs=True) + + self._run_check('&', [('data', '&')], convert_charrefs=False) + self._run_check('&', [('data', '&')], convert_charrefs=True) + + def test_unclosed_charref(self): + self._run_check('{ <', [('charref', '123'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('{ <', [('data', '{ <')], convert_charrefs=True) + self._run_check('« <', [('charref', 'xab'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('« <', [('data', '\xab <')], convert_charrefs=True) + + self._run_check('� <', + [('charref', '123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + self._run_check('� <', + [('charref', 'x123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + + self._run_check('&# <', [('data', '&# '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&# <', [('data', '&# <')], convert_charrefs=True) + self._run_check('&#x <', [('data', '&#x '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&#x <', [('data', '&#x <')], convert_charrefs=True) + + def test_eof_in_charref(self): + self._run_check('{', [('charref', '123')], convert_charrefs=False) + self._run_check('{', [('data', '{')], convert_charrefs=True) + self._run_check('«', [('charref', 'xab')], convert_charrefs=False) + self._run_check('«', [('data', '\xab')], convert_charrefs=True) + + self._run_check('�', [('charref', '123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + self._run_check('�', [('charref', 'x123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + + self._run_check('&#', [('data', '&#')], convert_charrefs=False) + self._run_check('&#', [('data', '&#')], convert_charrefs=True) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=False) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=True) def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping @@ -264,8 +369,7 @@ def test_get_starttag_text(self): ("starttag", "foo:bar", [("one", "1"), ("two", "2")]), ("starttag_text", s)]) - def test_cdata_content(self): - contents = [ + @support.subTests('content', [ ' ¬-an-entity-ref;', "", '

', @@ -278,60 +382,238 @@ def test_cdata_content(self): 'src="http://www.example.org/r=\'+new ' 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), '\n\n', - 'foo = "";', '', - # these two should be invalid according to the HTML 5 spec, - # section 8.1.2.2 - #'foo = ', - #'foo = ', - ] - elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] - for content in contents: - for element in elements: - element_lower = element.lower() - s = '<{element}>{content}'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)]) - - def test_cdata_with_closing_tags(self): + ]) + def test_script_content(self, content): + s = f'' + self._run_check(s, [ + ("starttag", "script", []), + ("data", content), + ("endtag", "script"), + ]) + + @support.subTests('content', [ + 'a::before { content: ""; }', + 'a::before { content: "¬-an-entity-ref;"; }', + 'a::before { content: ""; }', + 'a::before { content: "\u2603"; }', + ]) + def test_style_content(self, content): + s = f'' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")]) + + @support.subTests('tag', ['title', 'textarea']) + def test_rcdata_content(self, tag): + source = f"<{tag}>{SAMPLE_RCDATA}" + self._run_check(source, [ + ("starttag", tag, []), + ("data", SAMPLE_RCDATA), + ("endtag", tag), + ]) + source = f"<{tag}>&" + self._run_check(source, [ + ("starttag", tag, []), + ('entityref', 'amp'), + ("endtag", tag), + ]) + + @support.subTests('tag', + ['style', 'xmp', 'iframe', 'noembed', 'noframes', 'script']) + def test_rawtext_content(self, tag): + source = f"<{tag}>{SAMPLE_RAWTEXT}" + self._run_check(source, [ + ("starttag", tag, []), + ("data", SAMPLE_RAWTEXT), + ("endtag", tag), + ]) + + def test_noscript_content(self): + source = f"" + # scripting=False -- normal mode + self._run_check(source, [ + ('starttag', 'noscript', []), + ('comment', ' not a comment '), + ('starttag', 'not', [('a', 'start tag')]), + ('unknown decl', 'CDATA[not a cdata'), + ('comment', 'not a bogus comment'), + ('endtag', 'not'), + ('data', '☃'), + ('entityref', 'amp'), + ('charref', '9786'), + ('endtag', 'noscript'), + ]) + # scripting=True -- RAWTEXT mode + self._run_check(source, [ + ("starttag", "noscript", []), + ("data", SAMPLE_RAWTEXT), + ("endtag", "noscript"), + ], collector=EventCollector(scripting=True)) + + def test_plaintext_content(self): + content = SAMPLE_RAWTEXT + '' # not closing + source = f"

{content}" + self._run_check(source, [ + ("starttag", "plaintext", []), + ("data", content), + ]) + + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', + 'script/', 'script foo=bar', 'script foo=">"']) + def test_script_closing_tag(self, endtag): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. - # The normal event collector normalizes the events in get_events, - # so we override it to return the original list of events. - class Collector(EventCollector): - def get_events(self): - return self.events - content = """<!-- not a comment --> &not-an-entity-ref; <a href="" /> </p><p> <span></span></style> '</script' + '>'""" - for element in [' script', 'script ', ' script ', - '\nscript', 'script\n', '\nscript\n']: - element_lower = element.lower().strip() - s = '<script>{content}</{element}>'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)], - collector=Collector(convert_charrefs=False)) + s = f'<ScrIPt>{content}</{endtag}>' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + + @support.subTests('tag', [ + 'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes', + 'textarea', 'title', 'noscript', + ]) + def test_closing_tag(self, tag): + for endtag in [tag, tag.upper(), f'{tag} ', f'{tag}\n', + f'{tag}/', f'{tag} foo=bar', f'{tag} foo=">"']: + content = "<!-- not a comment --><i>Spam</i>" + s = f'<{tag.upper()}>{content}</{endtag}>' + self._run_check(s, [ + ("starttag", tag, []), + ('data', content), + ("endtag", tag), + ], collector=EventCollectorNoNormalize(convert_charrefs=False, scripting=True)) + + @support.subTests('tag', [ + 'script', 'style', 'xmp', 'iframe', 'noembed', 'noframes', + 'textarea', 'title', 'noscript', + ]) + def test_invalid_closing_tag(self, tag): + content = ( + f'< /{tag}>' + f'</ {tag}>' + f'</{tag}x>' + f'</{tag}\v>' + f'</{tag}\xa0>' + ) + source = f"<{tag}>{content}</{tag}>" + self._run_check(source, [ + ("starttag", tag, []), + ("data", content), + ("endtag", tag), + ], collector=EventCollector(convert_charrefs=False, scripting=True)) + + @support.subTests('tag,endtag', [ + ('title', 'tıtle'), + ('style', 'ſtyle'), + ('style', 'ſtyle'), + ('style', 'style'), + ('iframe', 'ıframe'), + ('noframes', 'noframeſ'), + ('noscript', 'noſcript'), + ('noscript', 'noscrıpt'), + ('script', 'ſcript'), + ('script', 'scrıpt'), + ]) + def test_invalid_nonascii_closing_tag(self, tag, endtag): + content = f"<br></{endtag}>" + source = f"<{tag}>{content}" + self._run_check(source, [ + ("starttag", tag, []), + ("data", content), + ], collector=EventCollector(convert_charrefs=False, scripting=True)) + source = f"<{tag}>{content}</{tag}>" + self._run_check(source, [ + ("starttag", tag, []), + ("data", content), + ("endtag", tag), + ], collector=EventCollector(convert_charrefs=False, scripting=True)) + + @support.subTests('tail,end', [ + ('', False), + ('<', False), + ('</', False), + ('</s', False), + ('</script', False), + ('</script ', True), + ('</script foo=bar', True), + ('</script foo=">', True), + ]) + def test_eof_in_script(self, tail, end): + content = "a = 123" + s = f'<ScrIPt>{content}{tail}' + self._run_check(s, [("starttag", "script", []), + ("data", content if end else content + tail)], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + + @support.subTests('tail,end', [ + ('', False), + ('<', False), + ('</', False), + ('</t', False), + ('</title', False), + ('</title ', True), + ('</title foo=bar', True), + ('</title foo=">', True), + ]) + def test_eof_in_title(self, tail, end): + s = f'<TitLe>Egg &amp; Spam{tail}' + self._run_check(s, [("starttag", "title", []), + ("data", "Egg & Spam" + ('' if end else tail))], + collector=EventCollectorNoNormalize(convert_charrefs=True)) + self._run_check(s, [("starttag", "title", []), + ('data', 'Egg '), + ('entityref', 'amp'), + ('data', ' Spam' + ('' if end else tail))], + collector=EventCollectorNoNormalize(convert_charrefs=False)) def test_comments(self): html = ("<!-- I'm a valid comment -->" '<!--me too!-->' '<!------>' + '<!----->' '<!---->' + # abrupt-closing-of-empty-comment + '<!--->' + '<!-->' '<!----I have many hyphens---->' '<!-- I have a > in the middle -->' - '<!-- and I have -- in the middle! -->') + '<!-- and I have -- in the middle! -->' + '<!--incorrectly-closed-comment--!>' + '<!----!>' + '<!----!-->' + '<!---- >-->' + '<!---!>-->' + '<!--!>-->' + # nested-comment + '<!-- <!-- nested --> -->' + '<!--<!-->' + '<!--<!--!>' + ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), + ('comment', '-'), + ('comment', ''), + ('comment', ''), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', 'incorrectly-closed-comment'), + ('comment', ''), + ('comment', '--!'), + ('comment', '-- >'), + ('comment', '-!>'), + ('comment', '!>'), + ('comment', ' <!-- nested '), ('data', ' -->'), + ('comment', '<!'), + ('comment', '<!'), + ] self._run_check(html, expected) def test_condcoms(self): @@ -348,18 +630,16 @@ def test_convert_charrefs(self): collector = lambda: EventCollectorCharrefs() self.assertTrue(collector().convert_charrefs) charrefs = ['&quot;', '&#34;', '&#x22;', '&quot', '&#34', '&#x22'] - # check charrefs in the middle of the text/attributes - expected = [('starttag', 'a', [('href', 'foo"zar')]), - ('data', 'a"z'), ('endtag', 'a')] + # check charrefs in the middle of the text + expected = [('starttag', 'a', []), ('data', 'a"z'), ('endtag', 'a')] for charref in charrefs: - self._run_check('<a href="foo{0}zar">a{0}z</a>'.format(charref), + self._run_check('<a>a{0}z</a>'.format(charref), expected, collector=collector()) - # check charrefs at the beginning/end of the text/attributes - expected = [('data', '"'), - ('starttag', 'a', [('x', '"'), ('y', '"X'), ('z', 'X"')]), + # check charrefs at the beginning/end of the text + expected = [('data', '"'), ('starttag', 'a', []), ('data', '"'), ('endtag', 'a'), ('data', '"')] for charref in charrefs: - self._run_check('{0}<a x="{0}" y="{0}X" z="X{0}">' + self._run_check('{0}<a>' '{0}</a>{0}'.format(charref), expected, collector=collector()) # check charrefs in <script>/<style> elements @@ -382,6 +662,35 @@ def test_convert_charrefs(self): self._run_check('no charrefs here', [('data', 'no charrefs here')], collector=collector()) + def test_convert_charrefs_in_attribute_values(self): + # default value for convert_charrefs is now True + collector = lambda: EventCollectorCharrefs() + self.assertTrue(collector().convert_charrefs) + + # always unescape terminated entity refs, numeric and hex char refs: + # - regardless whether they are at start, middle, end of attribute + # - or followed by alphanumeric, non-alphanumeric, or equals char + charrefs = ['&cent;', '&#xa2;', '&#xa2', '&#162;', '&#162'] + expected = [('starttag', 'a', + [('x', '¢'), ('x', 'z¢'), ('x', '¢z'), + ('x', 'z¢z'), ('x', '¢ z'), ('x', '¢=z')]), + ('endtag', 'a')] + for charref in charrefs: + self._run_check('<a x="{0}" x="z{0}" x="{0}z" ' + ' x="z{0}z" x="{0} z" x="{0}=z"></a>' + .format(charref), expected, collector=collector()) + + # only unescape unterminated entity matches if they are not followed by + # an alphanumeric or an equals sign + charref = '&cent' + expected = [('starttag', 'a', + [('x', '¢'), ('x', 'z¢'), ('x', '&centz'), + ('x', 'z&centz'), ('x', '¢ z'), ('x', '&cent=z')]), + ('endtag', 'a')] + self._run_check('<a x="{0}" x="z{0}" x="{0}z" ' + ' x="z{0}z" x="{0} z" x="{0}=z"></a>' + .format(charref), expected, collector=collector()) + # the remaining tests were for the "tolerant" parser (which is now # the default), and check various kind of broken markup def test_tolerant_parsing(self): @@ -393,28 +702,34 @@ def test_tolerant_parsing(self): ('data', '<'), ('starttag', 'bc<', [('a', None)]), ('endtag', 'html'), - ('data', '\n<img src="URL>'), - ('comment', '/img'), - ('endtag', 'html<')]) + ('data', '\n')]) def test_starttag_junk_chars(self): + self._run_check("<", [('data', '<')]) + self._run_check("<>", [('data', '<>')]) + self._run_check("< >", [('data', '< >')]) + self._run_check("< ", [('data', '< ')]) self._run_check("</>", []) + self._run_check("<$>", [('data', '<$>')]) self._run_check("</$>", [('comment', '$')]) self._run_check("</", [('data', '</')]) - self._run_check("</a", [('data', '</a')]) + self._run_check("</a", []) + self._run_check("</ a>", [('comment', ' a')]) + self._run_check("</ a", [('comment', ' a')]) self._run_check("<a<a>", [('starttag', 'a<a', [])]) self._run_check("</a<a>", [('endtag', 'a<a')]) - self._run_check("<!", [('data', '<!')]) - self._run_check("<a", [('data', '<a')]) - self._run_check("<a foo='bar'", [('data', "<a foo='bar'")]) - self._run_check("<a foo='bar", [('data', "<a foo='bar")]) - self._run_check("<a foo='>'", [('data', "<a foo='>'")]) - self._run_check("<a foo='>", [('data', "<a foo='>")]) + self._run_check("<!", [('comment', '')]) + self._run_check("<a", []) + self._run_check("<a foo='bar'", []) + self._run_check("<a foo='bar", []) + self._run_check("<a foo='>'", []) + self._run_check("<a foo='>", []) self._run_check("<a$>", [('starttag', 'a$', [])]) self._run_check("<a$b>", [('starttag', 'a$b', [])]) self._run_check("<a$b/>", [('startendtag', 'a$b', [])]) self._run_check("<a$b >", [('starttag', 'a$b', [])]) self._run_check("<a$b />", [('startendtag', 'a$b', [])]) + self._run_check("</a$b>", [('endtag', 'a$b')]) def test_slashes_in_starttag(self): self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])]) @@ -447,6 +762,10 @@ def test_slashes_in_starttag(self): ] self._run_check(html, expected) + def test_slashes_in_endtag(self): + self._run_check('</a/>', [('endtag', 'a')]) + self._run_check('</a foo="var"/>', [('endtag', 'a')]) + def test_declaration_junk_chars(self): self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) @@ -481,15 +800,11 @@ def test_invalid_end_tags(self): self._run_check(html, expected) def test_broken_invalid_end_tag(self): - # This is technically wrong (the "> shouldn't be included in the 'data') - # but is probably not worth fixing it (in addition to all the cases of - # the previous test, it would require a full attribute parsing). - # see #13993 html = '<b>This</b attr=">"> confuses the parser' expected = [('starttag', 'b', []), ('data', 'This'), ('endtag', 'b'), - ('data', '"> confuses the parser')] + ('data', ' confuses the parser')] self._run_check(html, expected) def test_correct_detection_of_start_tags(self): @@ -525,69 +840,165 @@ def test_correct_detection_of_start_tags(self): ] self._run_check(html, expected) - def test_EOF_in_charref(self): - # see #17802 - # This test checks that the UnboundLocalError reported in the issue - # is not raised, however I'm not sure the returned values are correct. - # Maybe HTMLParser should use self.unescape for these + def test_eof_in_comments(self): + data = [ + ('<!--', [('comment', '')]), + ('<!---', [('comment', '')]), + ('<!----', [('comment', '')]), + ('<!-----', [('comment', '-')]), + ('<!------', [('comment', '--')]), + ('<!----!', [('comment', '')]), + ('<!---!', [('comment', '-!')]), + ('<!---!>', [('comment', '-!>')]), + ('<!--foo', [('comment', 'foo')]), + ('<!--foo-', [('comment', 'foo')]), + ('<!--foo--', [('comment', 'foo')]), + ('<!--foo--!', [('comment', 'foo')]), + ('<!--<!--', [('comment', '<!')]), + ('<!--<!--!', [('comment', '<!')]), + ] + for html, expected in data: + self._run_check(html, expected) + + def test_eof_in_declarations(self): data = [ - ('a&', [('data', 'a&')]), - ('a&b', [('data', 'ab')]), - ('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]), - ('a&b;', [('data', 'a'), ('entityref', 'b')]), + ('<!', [('comment', '')]), + ('<!-', [('comment', '-')]), + ('<![', [('comment', '[')]), + ('<!DOCTYPE', [('decl', 'DOCTYPE')]), + ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]), + ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]), + ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]), + ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]), + ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]), + ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo', + [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]), ] for html, expected in data: self._run_check(html, expected) - def test_broken_comments(self): - html = ('<! not really a comment >' + @support.subTests('content', ['', 'x', 'x]', 'x]]']) + def test_eof_in_cdata(self, content): + self._run_check('<![CDATA[' + content, + [('unknown decl', 'CDATA[' + content)]) + self._run_check('<![CDATA[' + content, + [('comment', '[CDATA[' + content)], + collector=EventCollector(autocdata=True)) + self._run_check('<svg><text y="100"><![CDATA[' + content, + [('starttag', 'svg', []), + ('starttag', 'text', [('y', '100')]), + ('unknown decl', 'CDATA[' + content)]) + + def test_bogus_comments(self): + html = ('<!ELEMENT br EMPTY>' + '<! not really a comment >' '<! not a comment either -->' '<! -- close enough -->' '<!><!<-- this was an empty comment>' - '<!!! another bogus comment !!!>') + '<!!! another bogus comment !!!>' + # see #32876 + '<![with square brackets]!>' + '<![\nmultiline\nbogusness\n]!>' + '<![more brackets]-[and a hyphen]!>' + '<![cdata[should be uppercase]]>' + '<![CDATA [whitespaces are not ignored]]>' + '<![CDATA]]>' # required '[' after CDATA + ) expected = [ + ('comment', 'ELEMENT br EMPTY'), ('comment', ' not really a comment '), ('comment', ' not a comment either --'), ('comment', ' -- close enough --'), ('comment', ''), ('comment', '<-- this was an empty comment'), ('comment', '!! another bogus comment !!!'), + ('comment', '[with square brackets]!'), + ('comment', '[\nmultiline\nbogusness\n]!'), + ('comment', '[more brackets]-[and a hyphen]!'), + ('comment', '[cdata[should be uppercase]]'), + ('comment', '[CDATA [whitespaces are not ignored]]'), + ('comment', '[CDATA]]'), ] self._run_check(html, expected) def test_broken_condcoms(self): # these condcoms are missing the '--' after '<!' and before the '>' + # and they are considered bogus comments according to + # "8.2.4.42. Markup declaration open state" html = ('<![if !(IE)]>broken condcom<![endif]>' '<![if ! IE]><link href="favicon.tiff"/><![endif]>' '<![if !IE 6]><img src="firefox.png" /><![endif]>' '<![if !ie 6]><b>foo</b><![endif]>' '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>') - # According to the HTML5 specs sections "8.2.4.44 Bogus comment state" - # and "8.2.4.45 Markup declaration open state", comment tokens should - # be emitted instead of 'unknown decl', but calling unknown_decl - # provides more flexibility. - # See also Lib/_markupbase.py:parse_declaration expected = [ - ('unknown decl', 'if !(IE)'), + ('comment', '[if !(IE)]'), ('data', 'broken condcom'), - ('unknown decl', 'endif'), - ('unknown decl', 'if ! IE'), + ('comment', '[endif]'), + ('comment', '[if ! IE]'), ('startendtag', 'link', [('href', 'favicon.tiff')]), - ('unknown decl', 'endif'), - ('unknown decl', 'if !IE 6'), + ('comment', '[endif]'), + ('comment', '[if !IE 6]'), ('startendtag', 'img', [('src', 'firefox.png')]), - ('unknown decl', 'endif'), - ('unknown decl', 'if !ie 6'), + ('comment', '[endif]'), + ('comment', '[if !ie 6]'), ('starttag', 'b', []), ('data', 'foo'), ('endtag', 'b'), - ('unknown decl', 'endif'), - ('unknown decl', 'if (!IE)|(lt IE 9)'), + ('comment', '[endif]'), + ('comment', '[if (!IE)|(lt IE 9)]'), ('startendtag', 'img', [('src', 'mammoth.bmp')]), - ('unknown decl', 'endif') + ('comment', '[endif]') ] self._run_check(html, expected) + @support.subTests('content', [ + 'just some plain text', + '<!-- not a comment -->', + '&not-an-entity-ref;', + "<not a='start tag'>", + '', + '[[I have many brackets]]', + 'I have a > in the middle', + 'I have a ]] in the middle', + '] ]>', + ']] >', + ('\n' + ' if (a < b && a > b) {\n' + ' printf("[<marquee>How?</marquee>]");\n' + ' }\n'), + ]) + def test_cdata_section_content(self, content): + # See "13.2.5.42 Markup declaration open state", + # "13.2.5.69 CDATA section state", and issue bpo-32876. + html = f'<svg><text y="100"><![CDATA[{content}]]></text></svg>' + expected = [ + ('starttag', 'svg', []), + ('starttag', 'text', [('y', '100')]), + ('unknown decl', 'CDATA[' + content), + ('endtag', 'text'), + ('endtag', 'svg'), + ] + self._run_check(html, expected) + self._run_check(html, expected, collector=EventCollector(autocdata=True)) + + def test_cdata_section(self): + # See "13.2.5.42 Markup declaration open state". + html = ('<![CDATA[foo<br>bar]]>' + '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>' + '<![CDATA[foo<br>bar]]>') + expected = [ + ('comment', '[CDATA[foo<br'), + ('data', 'bar]]>'), + ('starttag', 'svg', []), + ('starttag', 'text', [('y', '100')]), + ('unknown decl', 'CDATA[foo<br>bar'), + ('endtag', 'text'), + ('endtag', 'svg'), + ('comment', '[CDATA[foo<br'), + ('data', 'bar]]>'), + ] + self._run_check(html, expected, collector=EventCollector(autocdata=True)) + def test_convert_charrefs_dropped_text(self): # #23144: make sure that all the events are triggered when # convert_charrefs is True, even if we don't call .close() @@ -600,6 +1011,26 @@ def test_convert_charrefs_dropped_text(self): ('endtag', 'a'), ('data', ' bar & baz')] ) + @support.requires_resource('cpu') + def test_eof_no_quadratic_complexity(self): + # Each of these examples used to take about an hour. + # Now they take a fraction of a second. + def check(source): + parser = html.parser.HTMLParser() + parser.feed(source) + parser.close() + n = 120_000 + check("<a " * n) + check("<a a=" * n) + check("</a " * 14 * n) + check("</a a=" * 11 * n) + check("<!--" * 4 * n) + check("<!" * 60 * n) + check("<?" * 19 * n) + check("</$" * 15 * n) + check("<![CDATA[" * 9 * n) + check("<!doctype" * 35 * n) + class AttributesTestCase(TestCaseBase): @@ -608,9 +1039,15 @@ def test_attr_syntax(self): ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) ] self._run_check("""<a b='v' c="v" d=v e>""", output) - self._run_check("""<a b = 'v' c = "v" d = v e>""", output) - self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) - self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) + self._run_check("<a foo==bar>", [('starttag', 'a', [('foo', '=bar')])]) + self._run_check("<a foo =bar>", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("<a foo\t=bar>", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("<a foo\v=bar>", [('starttag', 'a', [('foo\v', 'bar')])]) + self._run_check("<a foo\xa0=bar>", [('starttag', 'a', [('foo\xa0', 'bar')])]) + self._run_check("<a foo= bar>", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("<a foo=\tbar>", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("<a foo=\vbar>", [('starttag', 'a', [('foo', '\vbar')])]) + self._run_check("<a foo=\xa0bar>", [('starttag', 'a', [('foo', '\xa0bar')])]) def test_attr_values(self): self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", @@ -619,6 +1056,10 @@ def test_attr_values(self): ("d", "\txyz\n")])]) self._run_check("""<a b='' c="">""", [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("<a b=\tx c=\ny>", + [('starttag', 'a', [('b', 'x'), ('c', 'y')])]) + self._run_check("<a b=\v c=\xa0>", + [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. self._run_check("<e a=rgb(1,2,3)>", [("starttag", "e", [("a", "rgb(1,2,3)")])]) @@ -685,13 +1126,17 @@ def test_malformed_attributes(self): ) expected = [ ('starttag', 'a', [('href', "test'style='color:red;bad1'")]), - ('data', 'test - bad1'), ('endtag', 'a'), + ('data', 'test - bad1'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'+style='color:red;ba2'")]), - ('data', 'test - bad2'), ('endtag', 'a'), + ('data', 'test - bad2'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), - ('data', 'test - bad3'), ('endtag', 'a'), + ('data', 'test - bad3'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), - ('data', 'test - bad4'), ('endtag', 'a') + ('data', 'test - bad4'), + ('endtag', 'a'), ] self._run_check(html, expected) diff --git a/Lib/test/test_http_cookiejar.py b/Lib/test/test_http_cookiejar.py index 6bc33b15ec32e9..04cb440cd4ccf6 100644 --- a/Lib/test/test_http_cookiejar.py +++ b/Lib/test/test_http_cookiejar.py @@ -4,6 +4,7 @@ import stat import sys import re +from test import support from test.support import os_helper from test.support import warnings_helper import time @@ -105,8 +106,7 @@ def test_http2time_formats(self): self.assertEqual(http2time(s.lower()), test_t, s.lower()) self.assertEqual(http2time(s.upper()), test_t, s.upper()) - def test_http2time_garbage(self): - for test in [ + @support.subTests('test', [ '', 'Garbage', 'Mandag 16. September 1996', @@ -121,10 +121,9 @@ def test_http2time_garbage(self): '08-01-3697739', '09 Feb 19942632 22:23:32 GMT', 'Wed, 09 Feb 1994834 22:23:32 GMT', - ]: - self.assertIsNone(http2time(test), - "http2time(%s) is not None\n" - "http2time(test) %s" % (test, http2time(test))) + ]) + def test_http2time_garbage(self, test): + self.assertIsNone(http2time(test)) def test_http2time_redos_regression_actually_completes(self): # LOOSE_HTTP_DATE_RE was vulnerable to malicious input which caused catastrophic backtracking (REDoS). @@ -149,9 +148,7 @@ def parse_date(text): self.assertEqual(parse_date("1994-02-03 19:45:29 +0530"), (1994, 2, 3, 14, 15, 29)) - def test_iso2time_formats(self): - # test iso2time for supported dates. - tests = [ + @support.subTests('s', [ '1994-02-03 00:00:00 -0000', # ISO 8601 format '1994-02-03 00:00:00 +0000', # ISO 8601 format '1994-02-03 00:00:00', # zone is optional @@ -164,16 +161,15 @@ def test_iso2time_formats(self): # A few tests with extra space at various places ' 1994-02-03 ', ' 1994-02-03T00:00:00 ', - ] - + ]) + def test_iso2time_formats(self, s): + # test iso2time for supported dates. test_t = 760233600 # assume broken POSIX counting of seconds - for s in tests: - self.assertEqual(iso2time(s), test_t, s) - self.assertEqual(iso2time(s.lower()), test_t, s.lower()) - self.assertEqual(iso2time(s.upper()), test_t, s.upper()) + self.assertEqual(iso2time(s), test_t, s) + self.assertEqual(iso2time(s.lower()), test_t, s.lower()) + self.assertEqual(iso2time(s.upper()), test_t, s.upper()) - def test_iso2time_garbage(self): - for test in [ + @support.subTests('test', [ '', 'Garbage', 'Thursday, 03-Feb-94 00:00:00 GMT', @@ -186,9 +182,9 @@ def test_iso2time_garbage(self): '01-01-1980 00:00:62', '01-01-1980T00:00:62', '19800101T250000Z', - ]: - self.assertIsNone(iso2time(test), - "iso2time(%r)" % test) + ]) + def test_iso2time_garbage(self, test): + self.assertIsNone(iso2time(test)) def test_iso2time_performance_regression(self): # If ISO_DATE_RE regresses to quadratic complexity, this test will take a very long time to succeed. @@ -199,24 +195,23 @@ def test_iso2time_performance_regression(self): class HeaderTests(unittest.TestCase): - def test_parse_ns_headers(self): - # quotes should be stripped - expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] - for hdr in [ + @support.subTests('hdr', [ 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', - ]: - self.assertEqual(parse_ns_headers([hdr]), expected) - - def test_parse_ns_headers_version(self): - + ]) + def test_parse_ns_headers(self, hdr): # quotes should be stripped - expected = [[('foo', 'bar'), ('version', '1')]] - for hdr in [ + expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] + self.assertEqual(parse_ns_headers([hdr]), expected) + + @support.subTests('hdr', [ 'foo=bar; version="1"', 'foo=bar; Version="1"', - ]: - self.assertEqual(parse_ns_headers([hdr]), expected) + ]) + def test_parse_ns_headers_version(self, hdr): + # quotes should be stripped + expected = [[('foo', 'bar'), ('version', '1')]] + self.assertEqual(parse_ns_headers([hdr]), expected) def test_parse_ns_headers_special_names(self): # names such as 'expires' are not special in first name=value pair @@ -226,8 +221,7 @@ def test_parse_ns_headers_special_names(self): expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] self.assertEqual(parse_ns_headers([hdr]), expected) - def test_join_header_words(self): - for src, expected in [ + @support.subTests('src,expected', [ ([[("foo", None), ("bar", "baz")]], "foo; bar=baz"), (([]), ""), (([[]]), ""), @@ -237,12 +231,11 @@ def test_join_header_words(self): 'n; foo="foo;_", bar=foo_bar'), ([[("n", "m"), ("foo", None)], [("bar", "foo_bar")]], 'n=m; foo, bar=foo_bar'), - ]: - with self.subTest(src=src): - self.assertEqual(join_header_words(src), expected) + ]) + def test_join_header_words(self, src, expected): + self.assertEqual(join_header_words(src), expected) - def test_split_header_words(self): - tests = [ + @support.subTests('arg,expect', [ ("foo", [[("foo", None)]]), ("foo=bar", [[("foo", "bar")]]), (" foo ", [[("foo", None)]]), @@ -259,24 +252,22 @@ def test_split_header_words(self): (r'foo; bar=baz, spam=, foo="\,\;\"", bar= ', [[("foo", None), ("bar", "baz")], [("spam", "")], [("foo", ',;"')], [("bar", "")]]), - ] - - for arg, expect in tests: - try: - result = split_header_words([arg]) - except: - import traceback, io - f = io.StringIO() - traceback.print_exc(None, f) - result = "(error -- traceback follows)\n\n%s" % f.getvalue() - self.assertEqual(result, expect, """ + ]) + def test_split_header_words(self, arg, expect): + try: + result = split_header_words([arg]) + except: + import traceback, io + f = io.StringIO() + traceback.print_exc(None, f) + result = "(error -- traceback follows)\n\n%s" % f.getvalue() + self.assertEqual(result, expect, """ When parsing: '%s' Expected: '%s' Got: '%s' """ % (arg, expect, result)) - def test_roundtrip(self): - tests = [ + @support.subTests('arg,expect', [ ("foo", "foo"), ("foo=bar", "foo=bar"), (" foo ", "foo"), @@ -309,12 +300,11 @@ def test_roundtrip(self): ('n; foo="foo;_", bar="foo,_"', 'n; foo="foo;_", bar="foo,_"'), - ] - - for arg, expect in tests: - input = split_header_words([arg]) - res = join_header_words(input) - self.assertEqual(res, expect, """ + ]) + def test_roundtrip(self, arg, expect): + input = split_header_words([arg]) + res = join_header_words(input) + self.assertEqual(res, expect, """ When parsing: '%s' Expected: '%s' Got: '%s' @@ -516,14 +506,7 @@ class CookieTests(unittest.TestCase): ## just the 7 special TLD's listed in their spec. And folks rely on ## that... - def test_domain_return_ok(self): - # test optimization: .domain_return_ok() should filter out most - # domains in the CookieJar before we try to access them (because that - # may require disk access -- in particular, with MSIECookieJar) - # This is only a rough check for performance reasons, so it's not too - # critical as long as it's sufficiently liberal. - pol = DefaultCookiePolicy() - for url, domain, ok in [ + @support.subTests('url,domain,ok', [ ("http://foo.bar.com/", "blah.com", False), ("http://foo.bar.com/", "rhubarb.blah.com", False), ("http://foo.bar.com/", "rhubarb.foo.bar.com", False), @@ -543,11 +526,18 @@ def test_domain_return_ok(self): ("http://foo/", ".local", True), ("http://barfoo.com", ".foo.com", False), ("http://barfoo.com", "foo.com", False), - ]: - request = urllib.request.Request(url) - r = pol.domain_return_ok(domain, request) - if ok: self.assertTrue(r) - else: self.assertFalse(r) + ]) + def test_domain_return_ok(self, url, domain, ok): + # test optimization: .domain_return_ok() should filter out most + # domains in the CookieJar before we try to access them (because that + # may require disk access -- in particular, with MSIECookieJar) + # This is only a rough check for performance reasons, so it's not too + # critical as long as it's sufficiently liberal. + pol = DefaultCookiePolicy() + request = urllib.request.Request(url) + r = pol.domain_return_ok(domain, request) + if ok: self.assertTrue(r) + else: self.assertFalse(r) def test_missing_value(self): # missing = sign in Cookie: header is regarded by Mozilla as a missing @@ -581,10 +571,7 @@ def test_missing_value(self): self.assertEqual(interact_netscape(c, "http://www.acme.com/foo/"), '"spam"; eggs') - def test_rfc2109_handling(self): - # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, - # dependent on policy settings - for rfc2109_as_netscape, rfc2965, version in [ + @support.subTests('rfc2109_as_netscape,rfc2965,version', [ # default according to rfc2965 if not explicitly specified (None, False, 0), (None, True, 1), @@ -593,24 +580,27 @@ def test_rfc2109_handling(self): (False, True, 1), (True, False, 0), (True, True, 0), - ]: - policy = DefaultCookiePolicy( - rfc2109_as_netscape=rfc2109_as_netscape, - rfc2965=rfc2965) - c = CookieJar(policy) - interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") - try: - cookie = c._cookies["www.example.com"]["/"]["ni"] - except KeyError: - self.assertIsNone(version) # didn't expect a stored cookie - else: - self.assertEqual(cookie.version, version) - # 2965 cookies are unaffected - interact_2965(c, "http://www.example.com/", - "foo=bar; Version=1") - if rfc2965: - cookie2965 = c._cookies["www.example.com"]["/"]["foo"] - self.assertEqual(cookie2965.version, 1) + ]) + def test_rfc2109_handling(self, rfc2109_as_netscape, rfc2965, version): + # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, + # dependent on policy settings + policy = DefaultCookiePolicy( + rfc2109_as_netscape=rfc2109_as_netscape, + rfc2965=rfc2965) + c = CookieJar(policy) + interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") + try: + cookie = c._cookies["www.example.com"]["/"]["ni"] + except KeyError: + self.assertIsNone(version) # didn't expect a stored cookie + else: + self.assertEqual(cookie.version, version) + # 2965 cookies are unaffected + interact_2965(c, "http://www.example.com/", + "foo=bar; Version=1") + if rfc2965: + cookie2965 = c._cookies["www.example.com"]["/"]["foo"] + self.assertEqual(cookie2965.version, 1) def test_ns_parser(self): c = CookieJar() @@ -778,8 +768,7 @@ def test_default_path_with_query(self): # Cookie is sent back to the same URI. self.assertEqual(interact_netscape(cj, uri), value) - def test_escape_path(self): - cases = [ + @support.subTests('arg,result', [ # quoted safe ("/foo%2f/bar", "/foo%2F/bar"), ("/foo%2F/bar", "/foo%2F/bar"), @@ -799,9 +788,9 @@ def test_escape_path(self): ("/foo/bar\u00fc", "/foo/bar%C3%BC"), # UTF-8 encoded # unicode ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"), # UTF-8 encoded - ] - for arg, result in cases: - self.assertEqual(escape_path(arg), result) + ]) + def test_escape_path(self, arg, result): + self.assertEqual(escape_path(arg), result) def test_request_path(self): # with parameters diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 38429ad480ff1c..bcb828edec7c39 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1465,6 +1465,72 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") + def test_large_content_length(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + [conn, address] = serv.accept() + with conn: + while conn.recv(1024): + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" % size) + conn.sendall(b'A' * (size//3)) + conn.sendall(b'B' * (size - size//3)) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(15, 27): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertEqual(len(response.read()), size) + finally: + conn.close() + thread.join(1.0) + + def test_large_content_length_truncated(self): + serv = socket.create_server((HOST, 0)) + self.addCleanup(serv.close) + + def run_server(): + while True: + [conn, address] = serv.accept() + with conn: + conn.recv(1024) + if not size: + break + conn.sendall( + b"HTTP/1.1 200 Ok\r\n" + b"Content-Length: %d\r\n" + b"\r\n" + b"Text" % size) + + thread = threading.Thread(target=run_server) + thread.start() + self.addCleanup(thread.join, 1.0) + + conn = client.HTTPConnection(*serv.getsockname()) + try: + for w in range(18, 65): + size = 1 << w + conn.request("GET", "/") + with conn.getresponse() as response: + self.assertRaises(client.IncompleteRead, response.read) + conn.close() + finally: + conn.close() + size = 0 + conn.request("GET", "/") + conn.close() + thread.join(1.0) + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 2cafa4e45a1313..7b58c5ef55b337 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -8,6 +8,7 @@ SimpleHTTPRequestHandler, CGIHTTPRequestHandler from http import server, HTTPStatus +import contextlib import os import socket import sys @@ -359,6 +360,44 @@ def test_head_via_send_error(self): self.assertEqual(b'', data) +class HTTP09ServerTestCase(BaseTestCase): + + class request_handler(NoLogRequestHandler, BaseHTTPRequestHandler): + """Request handler for HTTP/0.9 server.""" + + def do_GET(self): + self.wfile.write(f'OK: here is {self.path}\r\n'.encode()) + + def setUp(self): + super().setUp() + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock = self.enterContext(self.sock) + self.sock.connect((self.HOST, self.PORT)) + + def test_simple_get(self): + self.sock.send(b'GET /index.html\r\n') + res = self.sock.recv(1024) + self.assertEqual(res, b"OK: here is /index.html\r\n") + + def test_invalid_request(self): + self.sock.send(b'POST /index.html\r\n') + res = self.sock.recv(1024) + self.assertIn(b"Bad HTTP/0.9 request type ('POST')", res) + + def test_single_request(self): + self.sock.send(b'GET /foo.html\r\n') + res = self.sock.recv(1024) + self.assertEqual(res, b"OK: here is /foo.html\r\n") + + # Ignore errors if the connection is already closed, + # as this is the expected behavior of HTTP/0.9. + with contextlib.suppress(OSError): + self.sock.send(b'GET /bar.html\r\n') + res = self.sock.recv(1024) + # The server should not process our request. + self.assertEqual(res, b'') + + def certdata_file(*path): return os.path.join(os.path.dirname(__file__), "certdata", *path) @@ -522,42 +561,120 @@ def close_conn(): reader.close() return body + def check_list_dir_dirname(self, dirname, quotedname=None): + fullpath = os.path.join(self.tempdir, dirname) + try: + os.mkdir(os.path.join(self.tempdir, dirname)) + except (OSError, UnicodeEncodeError): + self.skipTest(f'Can not create directory {dirname!a} ' + f'on current file system') + + if quotedname is None: + quotedname = urllib.parse.quote(dirname, errors='surrogatepass') + response = self.request(self.base_url + '/' + quotedname + '/') + body = self.check_status_and_reason(response, HTTPStatus.OK) + displaypath = html.escape(f'{self.base_url}/{dirname}/', quote=False) + enc = sys.getfilesystemencoding() + prefix = f'listing for {displaypath}</'.encode(enc, 'surrogateescape') + self.assertIn(prefix + b'title>', body) + self.assertIn(prefix + b'h1>', body) + + def check_list_dir_filename(self, filename): + fullpath = os.path.join(self.tempdir, filename) + content = ascii(fullpath).encode() + (os_helper.TESTFN_UNDECODABLE or b'\xff') + try: + with open(fullpath, 'wb') as f: + f.write(content) + except OSError: + self.skipTest(f'Can not create file {filename!a} ' + f'on current file system') + + response = self.request(self.base_url + '/') + body = self.check_status_and_reason(response, HTTPStatus.OK) + quotedname = urllib.parse.quote(filename, errors='surrogatepass') + enc = response.headers.get_content_charset() + self.assertIsNotNone(enc) + self.assertIn((f'href="{quotedname}"').encode('ascii'), body) + displayname = html.escape(filename, quote=False) + self.assertIn(f'>{displayname}<'.encode(enc, 'surrogateescape'), body) + + response = self.request(self.base_url + '/' + quotedname) + self.check_status_and_reason(response, HTTPStatus.OK, data=content) + + @unittest.skipUnless(os_helper.TESTFN_NONASCII, + 'need os_helper.TESTFN_NONASCII') + def test_list_dir_nonascii_dirname(self): + dirname = os_helper.TESTFN_NONASCII + '.dir' + self.check_list_dir_dirname(dirname) + + @unittest.skipUnless(os_helper.TESTFN_NONASCII, + 'need os_helper.TESTFN_NONASCII') + def test_list_dir_nonascii_filename(self): + filename = os_helper.TESTFN_NONASCII + '.txt' + self.check_list_dir_filename(filename) + @unittest.skipIf(is_apple, 'undecodable name cannot always be decoded on Apple platforms') @unittest.skipIf(sys.platform == 'win32', 'undecodable name cannot be decoded on win32') @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, 'need os_helper.TESTFN_UNDECODABLE') - def test_undecodable_filename(self): - enc = sys.getfilesystemencoding() - filename = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.txt' - with open(os.path.join(self.tempdir, filename), 'wb') as f: - f.write(os_helper.TESTFN_UNDECODABLE) - response = self.request(self.base_url + '/') - if is_apple: - # On Apple platforms the HFS+ filesystem replaces bytes that - # aren't valid UTF-8 into a percent-encoded value. - for name in os.listdir(self.tempdir): - if name != 'test': # Ignore a filename created in setUp(). - filename = name - break - body = self.check_status_and_reason(response, HTTPStatus.OK) - quotedname = urllib.parse.quote(filename, errors='surrogatepass') - self.assertIn(('href="%s"' % quotedname) - .encode(enc, 'surrogateescape'), body) - self.assertIn(('>%s<' % html.escape(filename, quote=False)) - .encode(enc, 'surrogateescape'), body) - response = self.request(self.base_url + '/' + quotedname) - self.check_status_and_reason(response, HTTPStatus.OK, - data=os_helper.TESTFN_UNDECODABLE) + def test_list_dir_undecodable_dirname(self): + dirname = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.dir' + self.check_list_dir_dirname(dirname) - def test_undecodable_parameter(self): - # sanity check using a valid parameter + @unittest.skipIf(is_apple, + 'undecodable name cannot always be decoded on Apple platforms') + @unittest.skipIf(sys.platform == 'win32', + 'undecodable name cannot be decoded on win32') + @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, + 'need os_helper.TESTFN_UNDECODABLE') + def test_list_dir_undecodable_filename(self): + filename = os.fsdecode(os_helper.TESTFN_UNDECODABLE) + '.txt' + self.check_list_dir_filename(filename) + + def test_list_dir_undecodable_dirname2(self): + dirname = '\ufffd.dir' + self.check_list_dir_dirname(dirname, quotedname='%ff.dir') + + @unittest.skipUnless(os_helper.TESTFN_UNENCODABLE, + 'need os_helper.TESTFN_UNENCODABLE') + def test_list_dir_unencodable_dirname(self): + dirname = os_helper.TESTFN_UNENCODABLE + '.dir' + self.check_list_dir_dirname(dirname) + + @unittest.skipUnless(os_helper.TESTFN_UNENCODABLE, + 'need os_helper.TESTFN_UNENCODABLE') + def test_list_dir_unencodable_filename(self): + filename = os_helper.TESTFN_UNENCODABLE + '.txt' + self.check_list_dir_filename(filename) + + def test_list_dir_escape_dirname(self): + # Characters that need special treating in URL or HTML. + for name in ('q?', 'f#', '&amp;', '&amp', '<i>', '"dq"', "'sq'", + '%A4', '%E2%82%AC'): + with self.subTest(name=name): + dirname = name + '.dir' + self.check_list_dir_dirname(dirname, + quotedname=urllib.parse.quote(dirname, safe='&<>\'"')) + + def test_list_dir_escape_filename(self): + # Characters that need special treating in URL or HTML. + for name in ('q?', 'f#', '&amp;', '&amp', '<i>', '"dq"', "'sq'", + '%A4', '%E2%82%AC'): + with self.subTest(name=name): + filename = name + '.txt' + self.check_list_dir_filename(filename) + os_helper.unlink(os.path.join(self.tempdir, filename)) + + def test_list_dir_with_query_and_fragment(self): + prefix = f'listing for {self.base_url}/</'.encode('latin1') + response = self.request(self.base_url + '/#123').read() + self.assertIn(prefix + b'title>', response) + self.assertIn(prefix + b'h1>', response) response = self.request(self.base_url + '/?x=123').read() - self.assertRegex(response, rf'listing for {self.base_url}/\?x=123'.encode('latin1')) - # now the bogus encoding - response = self.request(self.base_url + '/?x=%bb').read() - self.assertRegex(response, rf'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1')) + self.assertIn(prefix + b'title>', response) + self.assertIn(prefix + b'h1>', response) def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. @@ -615,10 +732,19 @@ def test_get(self): # check for trailing "/" which should return 404. See Issue17324 response = self.request(self.base_url + '/test/') self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) + response = self.request(self.base_url + '/test%2f') + self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) + response = self.request(self.base_url + '/test%2F') + self.check_status_and_reason(response, HTTPStatus.NOT_FOUND) response = self.request(self.base_url + '/') self.check_status_and_reason(response, HTTPStatus.OK) + response = self.request(self.base_url + '%2f') + self.check_status_and_reason(response, HTTPStatus.OK) + response = self.request(self.base_url + '%2F') + self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.base_url) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader("Location"), self.base_url + "/") self.assertEqual(response.getheader("Content-Length"), "0") response = self.request(self.base_url + '/?hi=2') self.check_status_and_reason(response, HTTPStatus.OK) @@ -724,6 +850,8 @@ def test_path_without_leading_slash(self): self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.tempdir_name) self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader("Location"), + self.tempdir_name + "/") response = self.request(self.tempdir_name + '/?hi=2') self.check_status_and_reason(response, HTTPStatus.OK) response = self.request(self.tempdir_name + '?hi=1') @@ -731,27 +859,6 @@ def test_path_without_leading_slash(self): self.assertEqual(response.getheader("Location"), self.tempdir_name + "/?hi=1") - def test_html_escape_filename(self): - filename = '<test&>.txt' - fullpath = os.path.join(self.tempdir, filename) - - try: - open(fullpath, 'wb').close() - except OSError: - raise unittest.SkipTest('Can not create file %s on current file ' - 'system' % filename) - - try: - response = self.request(self.base_url + '/') - body = self.check_status_and_reason(response, HTTPStatus.OK) - enc = response.headers.get_content_charset() - finally: - os.unlink(fullpath) # avoid affecting test_undecodable_filename - - self.assertIsNotNone(enc) - html_text = '>%s<' % html.escape(filename, quote=False) - self.assertIn(html_text.encode(enc), body) - cgi_file1 = """\ #!%s @@ -806,6 +913,20 @@ def test_html_escape_filename(self): print("</pre>") """ +cgi_file7 = """\ +#!%s +import os +import sys + +print("Content-type: text/plain") +print() + +content_length = int(os.environ["CONTENT_LENGTH"]) +body = sys.stdin.buffer.read(content_length) + +print(f"{content_length} {len(body)}") +""" + @unittest.skipIf(hasattr(os, 'geteuid') and os.geteuid() == 0, "This test can't be run reliably as root (issue #13308).") @@ -845,6 +966,8 @@ def setUp(self): self.file3_path = None self.file4_path = None self.file5_path = None + self.file6_path = None + self.file7_path = None # The shebang line should be pure ASCII: use symlink if possible. # See issue #7668. @@ -899,6 +1022,11 @@ def setUp(self): file6.write(cgi_file6 % self.pythonexe) os.chmod(self.file6_path, 0o777) + self.file7_path = os.path.join(self.cgi_dir, 'file7.py') + with open(self.file7_path, 'w', encoding='utf-8') as file7: + file7.write(cgi_file7 % self.pythonexe) + os.chmod(self.file7_path, 0o777) + os.chdir(self.parent_dir) def tearDown(self): @@ -921,6 +1049,8 @@ def tearDown(self): os.remove(self.file5_path) if self.file6_path: os.remove(self.file6_path) + if self.file7_path: + os.remove(self.file7_path) os.rmdir(self.cgi_child_dir) os.rmdir(self.cgi_dir) os.rmdir(self.cgi_dir_in_sub_dir) @@ -993,6 +1123,22 @@ def test_post(self): self.assertEqual(res.read(), b'1, python, 123456' + self.linesep) + def test_large_content_length(self): + for w in range(15, 25): + size = 1 << w + body = b'X' * size + headers = {'Content-Length' : str(size)} + res = self.request('/cgi-bin/file7.py', 'POST', body, headers) + self.assertEqual(res.read(), b'%d %d' % (size, size) + self.linesep) + + def test_large_content_length_truncated(self): + with support.swap_attr(self.request_handler, 'timeout', 0.001): + for w in range(18, 65): + size = 1 << w + headers = {'Content-Length' : str(size)} + res = self.request('/cgi-bin/file1.py', 'POST', b'x', headers) + self.assertEqual(res.read(), b'Hello World' + self.linesep) + def test_invaliduri(self): res = self.request('/cgi-bin/invalid') res.read() diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index a13ee58d650e1b..a03d7b8bb2a42c 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -256,7 +256,20 @@ def cmd_IDLE(self, tag, args): self._send_tagged(tag, 'BAD', 'Expected DONE') -class NewIMAPTestsMixin(): +class AuthHandler_CRAM_MD5(SimpleIMAPHandler): + capabilities = 'LOGINDISABLED AUTH=CRAM-MD5' + def cmd_AUTHENTICATE(self, tag, args): + self._send_textline('+ PDE4OTYuNjk3MTcwOTUyQHBvc3RvZmZpY2Uucm' + 'VzdG9uLm1jaS5uZXQ=') + r = yield + if (r == b'dGltIGYxY2E2YmU0NjRiOWVmYT' + b'FjY2E2ZmZkNmNmMmQ5ZjMy\r\n'): + self._send_tagged(tag, 'OK', 'CRAM-MD5 successful') + else: + self._send_tagged(tag, 'NO', 'No access') + + +class NewIMAPTestsMixin: client = None def _setup(self, imap_handler, connect=True): @@ -360,7 +373,11 @@ def cmd_AUTHENTICATE(self, tag, args): self._send_tagged(tag, 'OK', 'FAKEAUTH successful') def cmd_APPEND(self, tag, args): self._send_textline('+') - self.server.response = yield + self.server.response = args + literal = yield + self.server.response.append(literal) + literal = yield + self.server.response.append(literal) self._send_tagged(tag, 'OK', 'okay') client, server = self._setup(UTF8AppendServer) self.assertEqual(client._encoding, 'ascii') @@ -371,10 +388,13 @@ def cmd_APPEND(self, tag, args): self.assertEqual(code, 'OK') self.assertEqual(client._encoding, 'utf-8') msg_string = 'Subject: üñí©öðé' - typ, data = client.append(None, None, None, msg_string.encode('utf-8')) + typ, data = client.append( + None, None, None, (msg_string + '\n').encode('utf-8')) self.assertEqual(typ, 'OK') self.assertEqual(server.response, - ('UTF8 (%s)\r\n' % msg_string).encode('utf-8')) + ['INBOX', 'UTF8', + '(~{25}', ('%s\r\n' % msg_string).encode('utf-8'), + b')\r\n' ]) def test_search_disallows_charset_in_utf8_mode(self): class UTF8Server(SimpleIMAPHandler): @@ -439,40 +459,31 @@ def cmd_AUTHENTICATE(self, tag, args): @hashlib_helper.requires_hashdigest('md5', openssl=True) def test_login_cram_md5_bytes(self): - class AuthHandler(SimpleIMAPHandler): - capabilities = 'LOGINDISABLED AUTH=CRAM-MD5' - def cmd_AUTHENTICATE(self, tag, args): - self._send_textline('+ PDE4OTYuNjk3MTcwOTUyQHBvc3RvZmZpY2Uucm' - 'VzdG9uLm1jaS5uZXQ=') - r = yield - if (r == b'dGltIGYxY2E2YmU0NjRiOWVmYT' - b'FjY2E2ZmZkNmNmMmQ5ZjMy\r\n'): - self._send_tagged(tag, 'OK', 'CRAM-MD5 successful') - else: - self._send_tagged(tag, 'NO', 'No access') - client, _ = self._setup(AuthHandler) - self.assertTrue('AUTH=CRAM-MD5' in client.capabilities) + client, _ = self._setup(AuthHandler_CRAM_MD5) + self.assertIn('AUTH=CRAM-MD5', client.capabilities) ret, _ = client.login_cram_md5("tim", b"tanstaaftanstaaf") self.assertEqual(ret, "OK") @hashlib_helper.requires_hashdigest('md5', openssl=True) def test_login_cram_md5_plain_text(self): - class AuthHandler(SimpleIMAPHandler): - capabilities = 'LOGINDISABLED AUTH=CRAM-MD5' - def cmd_AUTHENTICATE(self, tag, args): - self._send_textline('+ PDE4OTYuNjk3MTcwOTUyQHBvc3RvZmZpY2Uucm' - 'VzdG9uLm1jaS5uZXQ=') - r = yield - if (r == b'dGltIGYxY2E2YmU0NjRiOWVmYT' - b'FjY2E2ZmZkNmNmMmQ5ZjMy\r\n'): - self._send_tagged(tag, 'OK', 'CRAM-MD5 successful') - else: - self._send_tagged(tag, 'NO', 'No access') - client, _ = self._setup(AuthHandler) - self.assertTrue('AUTH=CRAM-MD5' in client.capabilities) + client, _ = self._setup(AuthHandler_CRAM_MD5) + self.assertIn('AUTH=CRAM-MD5', client.capabilities) ret, _ = client.login_cram_md5("tim", "tanstaaftanstaaf") self.assertEqual(ret, "OK") + def test_login_cram_md5_blocked(self): + def side_effect(*a, **kw): + raise ValueError + + client, _ = self._setup(AuthHandler_CRAM_MD5) + self.assertIn('AUTH=CRAM-MD5', client.capabilities) + msg = re.escape("CRAM-MD5 authentication is not supported") + with ( + mock.patch("hmac.HMAC", side_effect=side_effect), + self.assertRaisesRegex(imaplib.IMAP4.error, msg) + ): + client.login_cram_md5("tim", b"tanstaaftanstaaf") + def test_aborted_authentication(self): class MyServer(SimpleIMAPHandler): def cmd_AUTHENTICATE(self, tag, args): @@ -883,7 +894,11 @@ def test_enable_UTF8_True_append(self): class UTF8AppendServer(self.UTF8Server): def cmd_APPEND(self, tag, args): self._send_textline('+') - self.server.response = yield + self.server.response = args + literal = yield + self.server.response.append(literal) + literal = yield + self.server.response.append(literal) self._send_tagged(tag, 'OK', 'okay') with self.reaped_pair(UTF8AppendServer) as (server, client): @@ -897,12 +912,12 @@ def cmd_APPEND(self, tag, args): self.assertEqual(client._encoding, 'utf-8') msg_string = 'Subject: üñí©öðé' typ, data = client.append( - None, None, None, msg_string.encode('utf-8')) + None, None, None, (msg_string + '\n').encode('utf-8')) self.assertEqual(typ, 'OK') - self.assertEqual( - server.response, - ('UTF8 (%s)\r\n' % msg_string).encode('utf-8') - ) + self.assertEqual(server.response, + ['INBOX', 'UTF8', + '(~{25}', ('%s\r\n' % msg_string).encode('utf-8'), + b')\r\n' ]) # XXX also need a test that makes sure that the Literal and Untagged_status # regexes uses unicode in UTF8 mode instead of the default ASCII. diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 6e34094c5aa422..95121debbbfa74 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -35,7 +35,7 @@ cpython_only, is_apple_mobile, is_emscripten, - is_wasi, + is_wasm32, run_in_subinterp, run_in_subinterp_with_config, Py_TRACE_REFS, @@ -1187,6 +1187,7 @@ class substr(str): @unittest.skipIf(sys.platform == 'win32', 'Cannot delete cwd on Windows') @unittest.skipIf(sys.platform == 'sunos5', 'Cannot delete cwd on Solaris/Illumos') + @unittest.skipIf(sys.platform.startswith('aix'), 'Cannot delete cwd on AIX') def test_script_shadowing_stdlib_cwd_failure(self): with os_helper.temp_dir() as tmp: subtmp = os.path.join(tmp, "subtmp") @@ -1257,7 +1258,7 @@ class FilePermissionTests(unittest.TestCase): @unittest.skipUnless(os.name == 'posix', "test meaningful only on posix systems") @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "Emscripten's/WASI's umask is a stub." ) def test_creation_mode(self): @@ -3159,6 +3160,7 @@ def test_check_state_first(self): # Also, we test with a single-phase module that has global state, # which is shared by all interpreters. + @no_rerun(reason="module state is not cleared (see gh-140657)") @requires_subinterpreters def test_basic_multiple_interpreters_main_no_reset(self): # without resetting; already loaded in main interpreter diff --git a/Lib/test/test_importlib/import_/test_relative_imports.py b/Lib/test/test_importlib/import_/test_relative_imports.py index e535d119763148..1549cbe96ce2d1 100644 --- a/Lib/test/test_importlib/import_/test_relative_imports.py +++ b/Lib/test/test_importlib/import_/test_relative_imports.py @@ -223,6 +223,21 @@ def test_relative_import_no_package_exists_absolute(self): self.__import__('sys', {'__package__': '', '__spec__': None}, level=1) + def test_malicious_relative_import(self): + # https://github.com/python/cpython/issues/134100 + # Test to make sure UAF bug with error msg doesn't come back to life + import sys + loooong = "".ljust(0x23000, "b") + name = f"a.{loooong}.c" + + with util.uncache(name): + sys.modules[name] = {} + with self.assertRaisesRegex( + KeyError, + r"'a\.b+' not in sys\.modules as expected" + ): + __import__(f"{loooong}.c", {"__package__": "a"}, level=1) + (Frozen_RelativeImports, Source_RelativeImports diff --git a/Lib/test/test_importlib/test_abc.py b/Lib/test/test_importlib/test_abc.py index 070920d0da7e19..dd943210ffca3c 100644 --- a/Lib/test/test_importlib/test_abc.py +++ b/Lib/test/test_importlib/test_abc.py @@ -224,15 +224,7 @@ class ResourceLoaderDefaultsTests(ABCTestHarness): SPLIT = make_abc_subclasses(ResourceLoader) def test_get_data(self): - with ( - self.assertRaises(IOError), - self.assertWarnsRegex( - DeprecationWarning, - r"importlib\.abc\.ResourceLoader is deprecated in favour of " - r"supporting resource loading through importlib\.resources" - r"\.abc\.TraversableResources.", - ), - ): + with self.assertRaises(IOError): self.ins.get_data('/some/path') @@ -936,13 +928,8 @@ def get_filename(self, fullname): def path_stats(self, path): return {'mtime': 1} - with self.assertWarnsRegex( - DeprecationWarning, - r"importlib\.abc\.ResourceLoader is deprecated in favour of " - r"supporting resource loading through importlib\.resources" - r"\.abc\.TraversableResources.", - ): - loader = DummySourceLoader() + + loader = DummySourceLoader() with self.assertWarnsRegex( DeprecationWarning, @@ -952,17 +939,5 @@ def path_stats(self, path): loader.path_mtime('foo.py') -class ResourceLoaderDeprecationWarningsTests(unittest.TestCase): - """Tests ResourceLoader deprecation warnings.""" - - def test_deprecated_resource_loader(self): - from importlib.abc import ResourceLoader - class DummyLoader(ResourceLoader): - def get_data(self, path): - return b'' - - with self.assertWarns(DeprecationWarning): - DummyLoader() - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/test_locks.py b/Lib/test/test_importlib/test_locks.py index befac5d62b0abf..655e5881a1530b 100644 --- a/Lib/test/test_importlib/test_locks.py +++ b/Lib/test/test_importlib/test_locks.py @@ -34,6 +34,7 @@ class ModuleLockAsRLockTests: # lock status in repr unsupported test_repr = None test_locked_repr = None + test_repr_count = None def tearDown(self): for splitinit in init.values(): diff --git a/Lib/test/test_importlib/test_threaded_import.py b/Lib/test/test_importlib/test_threaded_import.py index 9af1e4d505c66e..f78dc399720c86 100644 --- a/Lib/test/test_importlib/test_threaded_import.py +++ b/Lib/test/test_importlib/test_threaded_import.py @@ -135,10 +135,12 @@ def check_parallel_module_init(self, mock_os): if verbose: print("OK.") - def test_parallel_module_init(self): + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_module_init(self, size): self.check_parallel_module_init() - def test_parallel_meta_path(self): + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_meta_path(self, size): finder = Finder() sys.meta_path.insert(0, finder) try: @@ -148,7 +150,8 @@ def test_parallel_meta_path(self): finally: sys.meta_path.remove(finder) - def test_parallel_path_hooks(self): + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_path_hooks(self, size): # Here the Finder instance is only used to check concurrent calls # to path_hook(). finder = Finder() @@ -242,13 +245,15 @@ def target(): __import__(TESTFN) del sys.modules[TESTFN] - def test_concurrent_futures_circular_import(self): + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_concurrent_futures_circular_import(self, size): # Regression test for bpo-43515 fn = os.path.join(os.path.dirname(__file__), 'partial', 'cfimport.py') script_helper.assert_python_ok(fn) - def test_multiprocessing_pool_circular_import(self): + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_multiprocessing_pool_circular_import(self, size): # Regression test for bpo-41567 fn = os.path.join(os.path.dirname(__file__), 'partial', 'pool_in_threads.py') diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py index 5de89714eb50c7..8c14b96271ad3d 100644 --- a/Lib/test/test_importlib/test_util.py +++ b/Lib/test/test_importlib/test_util.py @@ -580,6 +580,18 @@ def test_cache_from_source_respects_pycache_prefix_relative(self): self.util.cache_from_source(path, optimization=''), os.path.normpath(expect)) + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_in_root_with_pycache_prefix(self): + # Regression test for gh-82916 + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = 'qux.py' + expect = os.path.join(os.path.sep, 'tmp', 'bytecode', + f'qux.{self.tag}.pyc') + with util.temporary_pycache_prefix(pycache_prefix): + with os_helper.change_cwd('/'): + self.assertEqual(self.util.cache_from_source(path), expect) + @unittest.skipIf(sys.implementation.cache_tag is None, 'requires sys.implementation.cache_tag to not be None') def test_source_from_cache_inside_pycache_prefix(self): @@ -635,7 +647,7 @@ def test_magic_number(self): # stakeholders such as OS package maintainers must be notified # in advance. Such exceptional releases will then require an # adjustment to this test case. - EXPECTED_MAGIC_NUMBER = 3495 + EXPECTED_MAGIC_NUMBER = 3627 actual = int.from_bytes(importlib.util.MAGIC_NUMBER[:2], 'little') msg = ( @@ -776,31 +788,70 @@ def test_complete_multi_phase_init_module(self): self.run_with_own_gil(script) -class MiscTests(unittest.TestCase): - def test_atomic_write_should_notice_incomplete_writes(self): +class PatchAtomicWrites: + def __init__(self, truncate_at_length, never_complete=False): + self.truncate_at_length = truncate_at_length + self.never_complete = never_complete + self.seen_write = False + self._children = [] + + def __enter__(self): import _pyio oldwrite = os.write - seen_write = False - - truncate_at_length = 100 # Emulate an os.write that only writes partial data. def write(fd, data): - nonlocal seen_write - seen_write = True - return oldwrite(fd, data[:truncate_at_length]) + if self.seen_write and self.never_complete: + return None + self.seen_write = True + return oldwrite(fd, data[:self.truncate_at_length]) # Need to patch _io to be _pyio, so that io.FileIO is affected by the # os.write patch. - with (support.swap_attr(_bootstrap_external, '_io', _pyio), - support.swap_attr(os, 'write', write)): - with self.assertRaises(OSError): - # Make sure we write something longer than the point where we - # truncate. - content = b'x' * (truncate_at_length * 2) - _bootstrap_external._write_atomic(os_helper.TESTFN, content) - assert seen_write + self.children = [ + support.swap_attr(_bootstrap_external, '_io', _pyio), + support.swap_attr(os, 'write', write) + ] + for child in self.children: + child.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + for child in self.children: + child.__exit__(exc_type, exc_val, exc_tb) + + +class MiscTests(unittest.TestCase): + + def test_atomic_write_retries_incomplete_writes(self): + truncate_at_length = 100 + length = truncate_at_length * 2 + + with PatchAtomicWrites(truncate_at_length=truncate_at_length) as cm: + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * length + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) + + self.assertEqual(os.stat(support.os_helper.TESTFN).st_size, length) + os.unlink(support.os_helper.TESTFN) + + def test_atomic_write_errors_if_unable_to_complete(self): + truncate_at_length = 100 + + with ( + PatchAtomicWrites( + truncate_at_length=truncate_at_length, never_complete=True, + ) as cm, + self.assertRaises(OSError) + ): + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * (truncate_at_length * 2) + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) with self.assertRaises(OSError): os.stat(support.os_helper.TESTFN) # Check that the file did not get written. diff --git a/Lib/test/test_inspect/inspect_fodder2.py b/Lib/test/test_inspect/inspect_fodder2.py index 43fda6622537fc..157e12167b5d27 100644 --- a/Lib/test/test_inspect/inspect_fodder2.py +++ b/Lib/test/test_inspect/inspect_fodder2.py @@ -369,3 +369,35 @@ class dc364: # line 369 dc370 = dataclasses.make_dataclass('dc370', (('x', int), ('y', int))) dc371 = dataclasses.make_dataclass('dc370', (('x', int), ('y', int)), module=__name__) + +import inspect +import itertools + +# line 376 +ge377 = ( + inspect.currentframe() + for i in itertools.count() +) + +# line 382 +def func383(): + # line 384 + ge385 = ( + inspect.currentframe() + for i in itertools.count() + ) + return ge385 + +# line 391 +@decorator +# comment +def func394(): + return 395 + +# line 397 +@decorator + +def func400(): + return 401 + +pass # end of file diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index c9b37fcd8f6327..1ba253db48151e 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -148,6 +148,29 @@ def meth_self_o(self, object, /): pass def meth_type_noargs(type, /): pass def meth_type_o(type, object, /): pass +# Decorator decorator that returns a simple wrapped function +def identity_wrapper(func): + @functools.wraps(func) + def wrapped(*args, **kwargs): + return func(*args, **kwargs) + return wrapped + +# Original signature of the simple wrapped function returned by +# identity_wrapper(). +varargs_signature = ( + (('args', ..., ..., 'var_positional'), + ('kwargs', ..., ..., 'var_keyword')), + ..., +) + +# Decorator decorator that returns a simple descriptor +class custom_descriptor: + def __init__(self, func): + self.func = func + + def __get__(self, instance, owner): + return self.func.__get__(instance, owner) + class TestPredicates(IsTestBase): @@ -786,12 +809,12 @@ def test_getfile(self): def test_getfile_builtin_module(self): with self.assertRaises(TypeError) as e: inspect.getfile(sys) - self.assertTrue(str(e.exception).startswith('<module')) + self.assertStartsWith(str(e.exception), '<module') def test_getfile_builtin_class(self): with self.assertRaises(TypeError) as e: inspect.getfile(int) - self.assertTrue(str(e.exception).startswith('<class')) + self.assertStartsWith(str(e.exception), '<class') def test_getfile_builtin_function_or_method(self): with self.assertRaises(TypeError) as e_abs: @@ -1189,12 +1212,22 @@ def test_nested_class_definition_inside_async_function(self): self.assertSourceEqual(run(mod2.func225), 226, 227) self.assertSourceEqual(mod2.cls226, 231, 235) + self.assertSourceEqual(mod2.cls226.func232, 232, 235) self.assertSourceEqual(run(mod2.cls226().func232), 233, 234) def test_class_definition_same_name_diff_methods(self): self.assertSourceEqual(mod2.cls296, 296, 298) self.assertSourceEqual(mod2.cls310, 310, 312) + def test_generator_expression(self): + self.assertSourceEqual(next(mod2.ge377), 377, 380) + self.assertSourceEqual(next(mod2.func383()), 385, 388) + + def test_comment_or_empty_line_after_decorator(self): + self.assertSourceEqual(mod2.func394, 392, 395) + self.assertSourceEqual(mod2.func400, 398, 401) + + class TestNoEOL(GetSourceBase): def setUp(self): self.tempdir = TESTFN + '_dir' @@ -1749,14 +1782,55 @@ class C(metaclass=M): class TestFormatAnnotation(unittest.TestCase): def test_typing_replacement(self): - from test.typinganndata.ann_module9 import ann, ann1 + from test.typinganndata.ann_module9 import A, ann, ann1 self.assertEqual(inspect.formatannotation(ann), 'List[str] | int') self.assertEqual(inspect.formatannotation(ann1), 'List[testModule.typing.A] | int') + self.assertEqual(inspect.formatannotation(A, 'testModule.typing'), 'A') + self.assertEqual(inspect.formatannotation(A, 'other'), 'testModule.typing.A') + self.assertEqual( + inspect.formatannotation(ann1, 'testModule.typing'), + 'List[testModule.typing.A] | int', + ) + def test_forwardref(self): fwdref = ForwardRef('fwdref') self.assertEqual(inspect.formatannotation(fwdref), 'fwdref') + def test_formatannotationrelativeto(self): + from test.typinganndata.ann_module9 import A, ann1 + + # Builtin types: + self.assertEqual( + inspect.formatannotationrelativeto(object)(type), + 'type', + ) + + # Custom types: + self.assertEqual( + inspect.formatannotationrelativeto(None)(A), + 'testModule.typing.A', + ) + + class B: ... + B.__module__ = 'testModule.typing' + + self.assertEqual( + inspect.formatannotationrelativeto(B)(A), + 'A', + ) + + self.assertEqual( + inspect.formatannotationrelativeto(object)(A), + 'testModule.typing.A', + ) + + # Not an instance of "type": + self.assertEqual( + inspect.formatannotationrelativeto(A)(ann1), + 'List[testModule.typing.A] | int', + ) + class TestIsMethodDescriptor(unittest.TestCase): @@ -2820,7 +2894,7 @@ async def asyncTearDown(self): @classmethod def tearDownClass(cls): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) def _asyncgenstate(self): return inspect.getasyncgenstate(self.asyncgen) @@ -2949,7 +3023,7 @@ def test(po, /, pk, pkd=100, *args, ko, kod=10, **kwargs): pass sig = inspect.signature(test) - self.assertTrue(repr(sig).startswith('<Signature')) + self.assertStartsWith(repr(sig), '<Signature') self.assertTrue('(po, /, pk' in repr(sig)) # We need two functions, because it is impossible to represent @@ -2958,7 +3032,7 @@ def test2(pod=42, /): pass sig2 = inspect.signature(test2) - self.assertTrue(repr(sig2).startswith('<Signature')) + self.assertStartsWith(repr(sig2), '<Signature') self.assertTrue('(pod=42, /)' in repr(sig2)) po = sig.parameters['po'] @@ -4021,44 +4095,272 @@ def __init__(self, b): ('bar', 2, ..., "keyword_only")), ...)) - def test_signature_on_class_with_decorated_new(self): - def identity(func): - @functools.wraps(func) - def wrapped(*args, **kwargs): - return func(*args, **kwargs) - return wrapped - - class Foo: - @identity - def __new__(cls, a, b): + def test_signature_on_class_with_wrapped_metaclass_call(self): + class CM(type): + @identity_wrapper + def __call__(cls, a): + pass + class C(metaclass=CM): + def __init__(self, b): pass - self.assertEqual(self.signature(Foo), - ((('a', ..., ..., "positional_or_keyword"), - ('b', ..., ..., "positional_or_keyword")), + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), ...)) - self.assertEqual(self.signature(Foo.__new__), - ((('cls', ..., ..., "positional_or_keyword"), - ('a', ..., ..., "positional_or_keyword"), - ('b', ..., ..., "positional_or_keyword")), - ...)) + with self.subTest('classmethod'): + class CM(type): + @classmethod + @identity_wrapper + def __call__(cls, a): + return a + class C(metaclass=CM): + def __init__(self, b): + pass - class Bar: - __new__ = identity(object.__new__) + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) - varargs_signature = ( - (('args', ..., ..., 'var_positional'), - ('kwargs', ..., ..., 'var_keyword')), - ..., - ) + with self.subTest('staticmethod'): + class CM(type): + @staticmethod + @identity_wrapper + def __call__(a): + return a + class C(metaclass=CM): + def __init__(self, b): + pass + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('MethodType'): + class A: + @identity_wrapper + def call(self, a): + return a + class CM(type): + __call__ = A().call + class C(metaclass=CM): + def __init__(self, b): + pass + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('descriptor'): + class CM(type): + @custom_descriptor + @identity_wrapper + def __call__(self, a): + return a + class C(metaclass=CM): + def __init__(self, b): + pass + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + self.assertEqual(self.signature(C.__call__), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + self.assertEqual(self.signature(C, follow_wrapped=False), + varargs_signature) + self.assertEqual(self.signature(C.__call__, follow_wrapped=False), + varargs_signature) + + def test_signature_on_class_with_wrapped_init(self): + class C: + @identity_wrapper + def __init__(self, b): + pass + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('b', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('classmethod'): + class C: + @classmethod + @identity_wrapper + def __init__(cls, b): + pass + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('b', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('staticmethod'): + class C: + @staticmethod + @identity_wrapper + def __init__(b): + pass + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('b', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('MethodType'): + class A: + @identity_wrapper + def call(self, a): + pass - self.assertEqual(self.signature(Bar), ((), ...)) - self.assertEqual(self.signature(Bar.__new__), varargs_signature) - self.assertEqual(self.signature(Bar, follow_wrapped=False), - varargs_signature) - self.assertEqual(self.signature(Bar.__new__, follow_wrapped=False), - varargs_signature) + class C: + __init__ = A().call + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('partial'): + class C: + __init__ = functools.partial(identity_wrapper(lambda x, a, b: None), 2) + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('b', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('partialmethod'): + class C: + @identity_wrapper + def _init(self, x, a): + self.a = (x, a) + __init__ = functools.partialmethod(_init, 2) + + self.assertEqual(C(1).a, (2, 1)) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('descriptor'): + class C: + @custom_descriptor + @identity_wrapper + def __init__(self, a): + pass + + C(1) # does not raise + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + self.assertEqual(self.signature(C.__init__), + ((('self', ..., ..., "positional_or_keyword"), + ('a', ..., ..., "positional_or_keyword")), + ...)) + + self.assertEqual(self.signature(C, follow_wrapped=False), + varargs_signature) + if support.MISSING_C_DOCSTRINGS: + self.assertRaisesRegex( + ValueError, "no signature found", + self.signature, C.__new__, follow_wrapped=False, + ) + else: + self.assertEqual(self.signature(C.__new__, follow_wrapped=False), + varargs_signature) + + def test_signature_on_class_with_wrapped_new(self): + with self.subTest('FunctionType'): + class C: + @identity_wrapper + def __new__(cls, a): + return a + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('classmethod'): + class C: + @classmethod + @identity_wrapper + def __new__(cls, cls2, a): + return a + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('staticmethod'): + class C: + @staticmethod + @identity_wrapper + def __new__(cls, a): + return a + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('MethodType'): + class A: + @identity_wrapper + def call(self, cls, a): + return a + class C: + __new__ = A().call + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('partial'): + class C: + __new__ = functools.partial(identity_wrapper(lambda x, cls, a: (x, a)), 2) + + self.assertEqual(C(1), (2, 1)) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('partialmethod'): + class C: + __new__ = functools.partialmethod(identity_wrapper(lambda cls, x, a: (x, a)), 2) + + self.assertEqual(C(1), (2, 1)) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + + with self.subTest('descriptor'): + class C: + @custom_descriptor + @identity_wrapper + def __new__(cls, a): + return a + + self.assertEqual(C(1), 1) + self.assertEqual(self.signature(C), + ((('a', ..., ..., "positional_or_keyword"),), + ...)) + self.assertEqual(self.signature(C.__new__), + ((('cls', ..., ..., "positional_or_keyword"), + ('a', ..., ..., "positional_or_keyword")), + ...)) + + self.assertEqual(self.signature(C, follow_wrapped=False), + varargs_signature) + self.assertEqual(self.signature(C.__new__, follow_wrapped=False), + varargs_signature) def test_signature_on_class_with_init(self): class C: @@ -4997,6 +5299,37 @@ def test_signature_annotation_format(self): with self.assertRaisesRegex(NameError, "undefined"): signature_func(ida.f) + def test_signature_deferred_annotations(self): + def f(x: undef): + pass + + class C: + x: undef + + def __init__(self, x: undef): + self.x = x + + sig = inspect.signature(f, annotation_format=Format.FORWARDREF) + self.assertEqual(list(sig.parameters), ['x']) + sig = inspect.signature(C, annotation_format=Format.FORWARDREF) + self.assertEqual(list(sig.parameters), ['x']) + + class CallableWrapper: + def __init__(self, func): + self.func = func + self.__annotate__ = func.__annotate__ + + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) + + @property + def __annotations__(self): + return self.__annotate__(Format.VALUE) + + cw = CallableWrapper(f) + sig = inspect.signature(cw, annotation_format=Format.FORWARDREF) + self.assertEqual(list(sig.parameters), ['args', 'kwargs']) + def test_signature_none_annotation(self): class funclike: # Has to be callable, and have correct @@ -5102,7 +5435,7 @@ def test_signature_parameter_object(self): with self.assertRaisesRegex(ValueError, 'cannot have default values'): p.replace(kind=inspect.Parameter.VAR_POSITIONAL) - self.assertTrue(repr(p).startswith('<Parameter')) + self.assertStartsWith(repr(p), '<Parameter') self.assertTrue('"a=42"' in repr(p)) def test_signature_parameter_hashable(self): @@ -5844,9 +6177,9 @@ def test_operator_module_has_signatures(self): self._test_module_has_signatures(operator) def test_os_module_has_signatures(self): - unsupported_signature = {'chmod', 'link', 'utime'} + unsupported_signature = {'chmod', 'utime'} unsupported_signature |= {name for name in - ['get_terminal_size', 'posix_spawn', 'posix_spawnp', + ['get_terminal_size', 'link', 'posix_spawn', 'posix_spawnp', 'register_at_fork', 'startfile'] if hasattr(os, name)} self._test_module_has_signatures(os, unsupported_signature=unsupported_signature) @@ -5885,6 +6218,7 @@ def test_sysconfig_module_has_signatures(self): def test_threading_module_has_signatures(self): import threading self._test_module_has_signatures(threading) + self.assertIsNotNone(inspect.signature(threading.__excepthook__)) def test_thread_module_has_signatures(self): import _thread diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 245528ce57a146..7a7cb73f673787 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -836,7 +836,7 @@ def test_pylong_roundtrip(self): n = hibit | getrandbits(bits - 1) assert n.bit_length() == bits sn = str(n) - self.assertFalse(sn.startswith('0')) + self.assertNotStartsWith(sn, '0') self.assertEqual(n, int(sn)) bits <<= 1 diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index 66c7afce88f0d8..e88fd399dfba38 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1,18 +1,24 @@ +import contextlib import os import pickle +import sys from textwrap import dedent import threading import types import unittest from test import support +from test.support import os_helper +from test.support import script_helper from test.support import import_helper +from test.support.script_helper import assert_python_ok # Raise SkipTest if subinterpreters not supported. _interpreters = import_helper.import_module('_interpreters') +from concurrent import interpreters from test.support import Py_GIL_DISABLED -from test.support import interpreters from test.support import force_not_colorized -from test.support.interpreters import ( +import test._crossinterp_definitions as defs +from concurrent.interpreters import ( InterpreterError, InterpreterNotFoundError, ExecutionFailed, ) from .utils import ( @@ -29,6 +35,59 @@ WHENCE_STR_STDLIB = '_interpreters module' +def is_pickleable(obj): + try: + pickle.dumps(obj) + except Exception: + return False + return True + + +@contextlib.contextmanager +def defined_in___main__(name, script, *, remove=False): + import __main__ as mainmod + mainns = vars(mainmod) + assert name not in mainns + exec(script, mainns, mainns) + if remove: + yield mainns.pop(name) + else: + try: + yield mainns[name] + finally: + mainns.pop(name, None) + + +def build_excinfo(exctype, msg=None, formatted=None, errdisplay=None): + if isinstance(exctype, type): + assert issubclass(exctype, BaseException), exctype + exctype = types.SimpleNamespace( + __name__=exctype.__name__, + __qualname__=exctype.__qualname__, + __module__=exctype.__module__, + ) + elif isinstance(exctype, str): + module, _, name = exctype.rpartition(exctype) + if not module and name in __builtins__: + module = 'builtins' + exctype = types.SimpleNamespace( + __name__=name, + __qualname__=exctype, + __module__=module or None, + ) + else: + assert isinstance(exctype, types.SimpleNamespace) + assert msg is None or isinstance(msg, str), msg + assert formatted is None or isinstance(formatted, str), formatted + assert errdisplay is None or isinstance(errdisplay, str), errdisplay + return types.SimpleNamespace( + type=exctype, + msg=msg, + formatted=formatted, + errdisplay=errdisplay, + ) + + class ModuleTests(TestBase): def test_queue_aliases(self): @@ -75,7 +134,7 @@ def test_in_subinterpreter(self): main, = interpreters.list_all() interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters interp = interpreters.create() print(interp.id) """)) @@ -138,7 +197,7 @@ def test_subinterpreter(self): main = interpreters.get_main() interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(cur.id) """)) @@ -155,7 +214,7 @@ def test_idempotent(self): with self.subTest('subinterpreter'): interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(id(cur)) cur = interpreters.get_current() @@ -167,7 +226,7 @@ def test_idempotent(self): with self.subTest('per-interpreter'): interp = interpreters.create() out = _run_output(interp, dedent(""" - from test.support import interpreters + from concurrent import interpreters cur = interpreters.get_current() print(id(cur)) """)) @@ -354,9 +413,27 @@ def test_equality(self): def test_pickle(self): interp = interpreters.create() - data = pickle.dumps(interp) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, interp) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(interp, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, interp) + + @support.requires_subprocess() + @force_not_colorized + def test_cleanup_in_repl(self): + # GH-135729: Using a subinterpreter in the REPL would lead to an unraisable + # exception during finalization + repl = script_helper.spawn_python("-i") + script = b"""if True: + from concurrent import interpreters + interpreters.create() + exit()""" + stdout, stderr = repl.communicate(script) + self.assertIsNone(stderr) + self.assertIn(b"Interpreter.close()", stdout) + self.assertNotIn(b"Traceback", stdout) + class TestInterpreterIsRunning(TestBase): @@ -524,7 +601,7 @@ def test_from_current(self): main, = interpreters.list_all() interp = interpreters.create() out = _run_output(interp, dedent(f""" - from test.support import interpreters + from concurrent import interpreters interp = interpreters.Interpreter({interp.id}) try: interp.close() @@ -541,7 +618,7 @@ def test_from_sibling(self): self.assertEqual(set(interpreters.list_all()), {main, interp1, interp2}) interp1.exec(dedent(f""" - from test.support import interpreters + from concurrent import interpreters interp2 = interpreters.Interpreter({interp2.id}) interp2.close() interp3 = interpreters.create() @@ -647,6 +724,68 @@ def test_created_with_capi(self): self.interp_exists(interpid)) + def test_remaining_threads(self): + r_interp, w_interp = self.pipe() + + FINISHED = b'F' + + # It's unlikely, but technically speaking, it's possible + # that the thread could've finished before interp.close() is + # reached, so this test might not properly exercise the case. + # However, it's quite unlikely and probably not worth bothering about. + interp = interpreters.create() + interp.exec(f"""if True: + import os + import threading + import time + + def task(): + time.sleep(1) + os.write({w_interp}, {FINISHED!r}) + + threads = (threading.Thread(target=task) for _ in range(3)) + for t in threads: + t.start() + """) + interp.close() + + self.assertEqual(os.read(r_interp, 1), FINISHED) + + def test_remaining_daemon_threads(self): + # Daemon threads leak reference by nature, because they hang threads + # without allowing them to do cleanup (i.e., release refs). + # To prevent that from messing up the refleak hunter and whatnot, we + # run this in a subprocess. + code = '''if True: + import _interpreters + import types + interp = _interpreters.create( + types.SimpleNamespace( + use_main_obmalloc=False, + allow_fork=False, + allow_exec=False, + allow_threads=True, + allow_daemon_threads=True, + check_multi_interp_extensions=True, + gil='own', + ) + ) + _interpreters.exec(interp, f"""if True: + import threading + import time + + def task(): + time.sleep(3) + + threads = (threading.Thread(target=task, daemon=True) for _ in range(3)) + for t in threads: + t.start() + """) + _interpreters.destroy(interp) + ''' + assert_python_ok('-c', code) + + class TestInterpreterPrepareMain(TestBase): def test_empty(self): @@ -748,14 +887,17 @@ def eggs(): ham() """) scriptfile = self.make_script('script.py', tempdir, text=""" - from test.support import interpreters + from concurrent import interpreters def script(): import spam spam.eggs() interp = interpreters.create() - interp.exec(script) + try: + interp.exec(script) + finally: + interp.close() """) stdout, stderr = self.assert_python_failure(scriptfile) @@ -764,12 +906,12 @@ def script(): # File "{interpreters.__file__}", line 179, in exec self.assertEqual(stderr, dedent(f"""\ Traceback (most recent call last): - File "{scriptfile}", line 9, in <module> + File "{scriptfile}", line 10, in <module> interp.exec(script) ~~~~~~~~~~~^^^^^^^^ {interpmod_line.strip()} raise ExecutionFailed(excinfo) - test.support.interpreters.ExecutionFailed: RuntimeError: uh-oh! + concurrent.interpreters.ExecutionFailed: RuntimeError: uh-oh! Uncaught in the interpreter: @@ -839,9 +981,16 @@ def test_bad_script(self): interp.exec(10) def test_bytes_for_script(self): + r, w = self.pipe() + RAN = b'R' + DONE = b'D' interp = interpreters.create() - with self.assertRaises(TypeError): - interp.exec(b'print("spam")') + interp.exec(f"""if True: + import os + os.write({w}, {RAN!r}) + """) + os.write(w, DONE) + self.assertEqual(os.read(r, 1), RAN) def test_with_background_threads_still_running(self): r_interp, w_interp = self.pipe() @@ -879,28 +1028,46 @@ def test_created_with_capi(self): with self.assertRaisesRegex(InterpreterError, 'unrecognized'): interp.exec('raise Exception("it worked!")') + def test_list_comprehension(self): + # gh-135450: List comprehensions caused an assertion failure + # in _PyCode_CheckNoExternalState() + import string + r_interp, w_interp = self.pipe() + + interp = interpreters.create() + interp.exec(f"""if True: + import os + comp = [str(i) for i in range(10)] + os.write({w_interp}, ''.join(comp).encode()) + """) + self.assertEqual(os.read(r_interp, 10).decode(), string.digits) + interp.close() + + # test__interpreters covers the remaining # Interpreter.exec() behavior. -def call_func_noop(): - pass +call_func_noop = defs.spam_minimal +call_func_ident = defs.spam_returns_arg +call_func_failure = defs.spam_raises def call_func_return_shareable(): return (1, None) -def call_func_return_not_shareable(): - return [1, 2, 3] +def call_func_return_stateless_func(): + return (lambda x: x) -def call_func_failure(): - raise Exception('spam!') +def call_func_return_pickleable(): + return [1, 2, 3] -def call_func_ident(value): - return value +def call_func_return_unpickleable(): + x = 42 + return (lambda: x) def get_call_func_closure(value): @@ -909,6 +1076,11 @@ def call_func_closure(): return call_func_closure +def call_func_exec_wrapper(script, ns): + res = exec(script, ns, ns) + return res, ns, id(ns) + + class Spam: @staticmethod @@ -1005,86 +1177,663 @@ class TestInterpreterCall(TestBase): # - preserves info (e.g. SyntaxError) # - matching error display - def test_call(self): + @contextlib.contextmanager + def assert_fails(self, expected): + with self.assertRaises(ExecutionFailed) as cm: + yield cm + uncaught = cm.exception.excinfo + self.assertEqual(uncaught.type.__name__, expected.__name__) + + def assert_fails_not_shareable(self): + return self.assert_fails(interpreters.NotShareableError) + + def assert_code_equal(self, code1, code2): + if code1 == code2: + return + self.assertEqual(code1.co_name, code2.co_name) + self.assertEqual(code1.co_flags, code2.co_flags) + self.assertEqual(code1.co_consts, code2.co_consts) + self.assertEqual(code1.co_varnames, code2.co_varnames) + self.assertEqual(code1.co_cellvars, code2.co_cellvars) + self.assertEqual(code1.co_freevars, code2.co_freevars) + self.assertEqual(code1.co_names, code2.co_names) + self.assertEqual( + _testinternalcapi.get_code_var_counts(code1), + _testinternalcapi.get_code_var_counts(code2), + ) + self.assertEqual(code1.co_code, code2.co_code) + + def assert_funcs_equal(self, func1, func2): + if func1 == func2: + return + self.assertIs(type(func1), type(func2)) + self.assertEqual(func1.__name__, func2.__name__) + self.assertEqual(func1.__defaults__, func2.__defaults__) + self.assertEqual(func1.__kwdefaults__, func2.__kwdefaults__) + self.assertEqual(func1.__closure__, func2.__closure__) + self.assert_code_equal(func1.__code__, func2.__code__) + self.assertEqual( + _testinternalcapi.get_code_var_counts(func1), + _testinternalcapi.get_code_var_counts(func2), + ) + + def assert_exceptions_equal(self, exc1, exc2): + assert isinstance(exc1, Exception) + assert isinstance(exc2, Exception) + if exc1 == exc2: + return + self.assertIs(type(exc1), type(exc2)) + self.assertEqual(exc1.args, exc2.args) + + def test_stateless_funcs(self): interp = interpreters.create() - for i, (callable, args, kwargs) in enumerate([ - (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), - (Spam.noop, (), {}), + func = call_func_noop + with self.subTest('no args, no return'): + res = interp.call(func) + self.assertIsNone(res) + + func = call_func_return_shareable + with self.subTest('no args, returns shareable'): + res = interp.call(func) + self.assertEqual(res, (1, None)) + + func = call_func_return_stateless_func + expected = (lambda x: x) + with self.subTest('no args, returns stateless func'): + res = interp.call(func) + self.assert_funcs_equal(res, expected) + + func = call_func_return_pickleable + with self.subTest('no args, returns pickleable'): + res = interp.call(func) + self.assertEqual(res, [1, 2, 3]) + + func = call_func_return_unpickleable + with self.subTest('no args, returns unpickleable'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + + def test_stateless_func_returns_arg(self): + interp = interpreters.create() + + for arg in [ + None, + 10, + 'spam!', + b'spam!', + (1, 2, 'spam!'), + memoryview(b'spam!'), + ]: + with self.subTest(f'shareable {arg!r}'): + assert _interpreters.is_shareable(arg) + res = interp.call(defs.spam_returns_arg, arg) + self.assertEqual(res, arg) + + for arg in defs.STATELESS_FUNCTIONS: + with self.subTest(f'stateless func {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + self.assert_funcs_equal(res, arg) + + for arg in defs.TOP_FUNCTIONS: + if arg in defs.STATELESS_FUNCTIONS: + continue + with self.subTest(f'stateful func {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + self.assert_funcs_equal(res, arg) + assert is_pickleable(arg) + + for arg in [ + Ellipsis, + NotImplemented, + object(), + 2**1000, + [1, 2, 3], + {'a': 1, 'b': 2}, + types.SimpleNamespace(x=42), + # builtin types + object, + type, + Exception, + ModuleNotFoundError, + # builtin exceptions + Exception('uh-oh!'), + ModuleNotFoundError('mymodule'), + # builtin fnctions + len, + sys.exit, + # user classes + *defs.TOP_CLASSES, + *(c(*a) for c, a in defs.TOP_CLASSES.items() + if c not in defs.CLASSES_WITHOUT_EQUALITY), + ]: + with self.subTest(f'pickleable {arg!r}'): + res = interp.call(defs.spam_returns_arg, arg) + if type(arg) is object: + self.assertIs(type(res), object) + elif isinstance(arg, BaseException): + self.assert_exceptions_equal(res, arg) + else: + self.assertEqual(res, arg) + assert is_pickleable(arg) + + for arg in [ + types.MappingProxyType({}), + *(f for f in defs.NESTED_FUNCTIONS + if f not in defs.STATELESS_FUNCTIONS), + ]: + with self.subTest(f'unpickleable {arg!r}'): + assert not _interpreters.is_shareable(arg) + assert not is_pickleable(arg) + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + def test_full_args(self): + interp = interpreters.create() + expected = (1, 2, 3, 4, 5, 6, ('?',), {'g': 7, 'h': 8}) + func = defs.spam_full_args + res = interp.call(func, 1, 2, 3, 4, '?', e=5, f=6, g=7, h=8) + self.assertEqual(res, expected) + + def test_full_defaults(self): + # pickleable, but not stateless + interp = interpreters.create() + expected = (-1, -2, -3, -4, -5, -6, (), {'g': 8, 'h': 9}) + res = interp.call(defs.spam_full_args_with_defaults, g=8, h=9) + self.assertEqual(res, expected) + + def test_modified_arg(self): + interp = interpreters.create() + script = dedent(""" + a = 7 + b = 2 + c = a ** b + """) + ns = {} + expected = {'a': 7, 'b': 2, 'c': 49} + res = interp.call(call_func_exec_wrapper, script, ns) + obj, resns, resid = res + del resns['__builtins__'] + self.assertIsNone(obj) + self.assertEqual(ns, {}) + self.assertEqual(resns, expected) + self.assertNotEqual(resid, id(ns)) + self.assertNotEqual(resid, id(resns)) + + def test_func_in___main___valid(self): + # pickleable, already there' + + with os_helper.temp_dir() as tempdir: + def new_mod(name, text): + script_helper.make_script(tempdir, name, dedent(text)) + + def run(text): + name = 'myscript' + text = dedent(f""" + import sys + sys.path.insert(0, {tempdir!r}) + + """) + dedent(text) + filename = script_helper.make_script(tempdir, name, text) + res = script_helper.assert_python_ok(filename) + return res.out.decode('utf-8').strip() + + # no module indirection + with self.subTest('no indirection'): + text = run(f""" + from concurrent import interpreters + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + interp = interpreters.create() + res = interp.call(spam) + print(res) + """) + self.assertEqual(text, '<fake __main__>') + + # indirect as func, direct interp + new_mod('mymod', f""" + def run(interp, func): + return interp.call(func) + """) + with self.subTest('indirect as func, direct interp'): + text = run(f""" + from concurrent import interpreters + import mymod + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + interp = interpreters.create() + res = mymod.run(interp, spam) + print(res) + """) + self.assertEqual(text, '<fake __main__>') + + # indirect as func, indirect interp + new_mod('mymod', f""" + from concurrent import interpreters + def run(func): + interp = interpreters.create() + return interp.call(func) + """) + with self.subTest('indirect as func, indirect interp'): + text = run(f""" + import mymod + + def spam(): + # This a global var... + return __name__ + + if __name__ == '__main__': + res = mymod.run(spam) + print(res) + """) + self.assertEqual(text, '<fake __main__>') + + def test_func_in___main___invalid(self): + interp = interpreters.create() + + funcname = f'{__name__.replace(".", "_")}_spam_okay' + script = dedent(f""" + def {funcname}(): + # This a global var... + return __name__ + """) + + with self.subTest('pickleable, added dynamically'): + with defined_in___main__(funcname, script) as arg: + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + with self.subTest('lying about __main__'): + with defined_in___main__(funcname, script, remove=True) as arg: + with self.assertRaises(interpreters.NotShareableError): + interp.call(defs.spam_returns_arg, arg) + + def test_func_in___main___hidden(self): + # When a top-level function that uses global variables is called + # through Interpreter.call(), it will be pickled, sent over, + # and unpickled. That requires that it be found in the other + # interpreter's __main__ module. However, the original script + # that defined the function is only run in the main interpreter, + # so pickle.loads() would normally fail. + # + # We work around this by running the script in the other + # interpreter. However, this is a one-off solution for the sake + # of unpickling, so we avoid modifying that interpreter's + # __main__ module by running the script in a hidden module. + # + # In this test we verify that the function runs with the hidden + # module as its __globals__ when called in the other interpreter, + # and that the interpreter's __main__ module is unaffected. + text = dedent(""" + eggs = True + + def spam(*, explicit=False): + if explicit: + import __main__ + ns = __main__.__dict__ + else: + # For now we have to have a LOAD_GLOBAL in the + # function in order for globals() to actually return + # spam.__globals__. Maybe it doesn't go through pickle? + # XXX We will fix this later. + spam + ns = globals() + + func = ns.get('spam') + return [ + id(ns), + ns.get('__name__'), + ns.get('__file__'), + id(func), + None if func is None else repr(func), + ns.get('eggs'), + ns.get('ham'), + ] + + if __name__ == "__main__": + from concurrent import interpreters + interp = interpreters.create() + + ham = True + print([ + [ + spam(explicit=True), + spam(), + ], + [ + interp.call(spam, explicit=True), + interp.call(spam), + ], + ]) + """) + with os_helper.temp_dir() as tempdir: + filename = script_helper.make_script(tempdir, 'my-script', text) + res = script_helper.assert_python_ok(filename) + stdout = res.out.decode('utf-8').strip() + local, remote = eval(stdout) + + # In the main interpreter. + main, unpickled = local + nsid, _, _, funcid, func, _, _ = main + self.assertEqual(main, [ + nsid, + '__main__', + filename, + funcid, + func, + True, + True, + ]) + self.assertIsNot(func, None) + self.assertRegex(func, '^<function spam at 0x.*>$') + self.assertEqual(unpickled, main) + + # In the subinterpreter. + main, unpickled = remote + nsid1, _, _, funcid1, _, _, _ = main + self.assertEqual(main, [ + nsid1, + '__main__', + None, + funcid1, + None, + None, + None, + ]) + nsid2, _, _, funcid2, func, _, _ = unpickled + self.assertEqual(unpickled, [ + nsid2, + '<fake __main__>', + filename, + funcid2, + func, + True, + None, + ]) + self.assertIsNot(func, None) + self.assertRegex(func, '^<function spam at 0x.*>$') + self.assertNotEqual(nsid2, nsid1) + self.assertNotEqual(funcid2, funcid1) + + def test_func_in___main___uses_globals(self): + # See the note in test_func_in___main___hidden about pickle + # and the __main__ module. + # + # Additionally, the solution to that problem must provide + # for global variables on which a pickled function might rely. + # + # To check that, we run a script that has two global functions + # and a global variable in the __main__ module. One of the + # functions sets the global variable and the other returns + # the value. + # + # The script calls those functions multiple times in another + # interpreter, to verify the following: + # + # * the global variable is properly initialized + # * the global variable retains state between calls + # * the setter modifies that persistent variable + # * the getter uses the variable + # * the calls in the other interpreter do not modify + # the main interpreter + # * those calls don't modify the interpreter's __main__ module + # * the functions and variable do not actually show up in the + # other interpreter's __main__ module + text = dedent(""" + count = 0 + + def inc(x=1): + global count + count += x + + def get_count(): + return count + + if __name__ == "__main__": + counts = [] + results = [count, counts] + + from concurrent import interpreters + interp = interpreters.create() + + val = interp.call(get_count) + counts.append(val) + + interp.call(inc) + val = interp.call(get_count) + counts.append(val) + + interp.call(inc, 3) + val = interp.call(get_count) + counts.append(val) + + results.append(count) + + modified = {name: interp.call(eval, f'{name!r} in vars()') + for name in ('count', 'inc', 'get_count')} + results.append(modified) + + print(results) + """) + with os_helper.temp_dir() as tempdir: + filename = script_helper.make_script(tempdir, 'my-script', text) + res = script_helper.assert_python_ok(filename) + stdout = res.out.decode('utf-8').strip() + before, counts, after, modified = eval(stdout) + self.assertEqual(modified, { + 'count': False, + 'inc': False, + 'get_count': False, + }) + self.assertEqual(before, 0) + self.assertEqual(after, 0) + self.assertEqual(counts, [0, 1, 4]) + + def test_raises(self): + interp = interpreters.create() + with self.assertRaises(ExecutionFailed): + interp.call(call_func_failure) + + with self.assert_fails(ValueError): + interp.call(call_func_complex, '???', exc=ValueError('spam')) + + def test_call_valid(self): + interp = interpreters.create() + + for i, (callable, args, kwargs, expected) in enumerate([ + (call_func_noop, (), {}, None), + (call_func_ident, ('spamspamspam',), {}, 'spamspamspam'), + (call_func_return_shareable, (), {}, (1, None)), + (call_func_return_pickleable, (), {}, [1, 2, 3]), + (Spam.noop, (), {}, None), + (Spam.from_values, (), {}, Spam(())), + (Spam.from_values, (1, 2, 3), {}, Spam((1, 2, 3))), + (Spam, ('???',), {}, Spam('???')), + (Spam(101), (), {}, (101, (), {})), + (Spam(10101).run, (), {}, (10101, (), {})), + (call_func_complex, ('ident', 'spam'), {}, 'spam'), + (call_func_complex, ('full-ident', 'spam'), {}, ('spam', (), {})), + (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}, + ('spam', ('ham',), {'eggs': '!!!'})), + (call_func_complex, ('globals',), {}, __name__), + (call_func_complex, ('interpid',), {}, interp.id), + (call_func_complex, ('custom', 'spam!'), {}, Spam('spam!')), ]): with self.subTest(f'success case #{i+1}'): - res = interp.call(callable) - self.assertIs(res, None) + res = interp.call(callable, *args, **kwargs) + self.assertEqual(res, expected) + + def test_call_invalid(self): + interp = interpreters.create() + + func = get_call_func_closure + with self.subTest(func): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, 42) + + func = get_call_func_closure(42) + with self.subTest(func): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + + func = call_func_complex + op = 'closure' + with self.subTest(f'{func} ({op})'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, op, value='~~~') + + op = 'custom-inner' + with self.subTest(f'{func} ({op})'): + with self.assertRaises(interpreters.NotShareableError): + interp.call(func, op, 'eggs!') + + def test_callable_requires_frame(self): + # There are various functions that require a current frame. + interp = interpreters.create() + for call, expected in [ + ((eval, '[1, 2, 3]'), + [1, 2, 3]), + ((eval, 'sum([1, 2, 3])'), + 6), + ((exec, '...'), + None), + ]: + with self.subTest(str(call)): + res = interp.call(*call) + self.assertEqual(res, expected) + + result_not_pickleable = [ + globals, + locals, + vars, + ] + for func, expectedtype in { + globals: dict, + locals: dict, + vars: dict, + dir: list, + }.items(): + with self.subTest(str(func)): + if func in result_not_pickleable: + with self.assertRaises(interpreters.NotShareableError): + interp.call(func) + else: + res = interp.call(func) + self.assertIsInstance(res, expectedtype) + self.assertIn('__builtins__', res) + + def test_globals_from_builtins(self): + # The builtins exec(), eval(), globals(), locals(), vars(), + # and dir() each runs relative to the target interpreter's + # __main__ module, when called directly. However, + # globals(), locals(), and vars() don't work when called + # directly so we don't check them. + from _frozen_importlib import BuiltinImporter + interp = interpreters.create() + + names = interp.call(dir) + self.assertEqual(names, [ + '__builtins__', + '__doc__', + '__loader__', + '__name__', + '__package__', + '__spec__', + ]) + + values = {name: interp.call(eval, name) + for name in names if name != '__builtins__'} + self.assertEqual(values, { + '__name__': '__main__', + '__doc__': None, + '__spec__': None, # It wasn't imported, so no module spec? + '__package__': None, + '__loader__': BuiltinImporter, + }) + with self.assertRaises(ExecutionFailed): + interp.call(eval, 'spam'), + + interp.call(exec, f'assert dir() == {names}') + + # Update the interpreter's __main__. + interp.prepare_main(spam=42) + expected = names + ['spam'] + + names = interp.call(dir) + self.assertEqual(names, expected) + + value = interp.call(eval, 'spam') + self.assertEqual(value, 42) + + interp.call(exec, f'assert dir() == {expected}, dir()') + + def test_globals_from_stateless_func(self): + # A stateless func, which doesn't depend on any globals, + # doesn't go through pickle, so it runs in __main__. + def set_global(name, value): + globals()[name] = value + + def get_global(name): + return globals().get(name) + + interp = interpreters.create() + + modname = interp.call(get_global, '__name__') + self.assertEqual(modname, '__main__') + + res = interp.call(get_global, 'spam') + self.assertIsNone(res) + + interp.exec('spam = True') + res = interp.call(get_global, 'spam') + self.assertTrue(res) + + interp.call(set_global, 'spam', 42) + res = interp.call(get_global, 'spam') + self.assertEqual(res, 42) + + interp.exec('assert spam == 42, repr(spam)') + + def test_call_in_thread(self): + interp = interpreters.create() for i, (callable, args, kwargs) in enumerate([ - (call_func_ident, ('spamspamspam',), {}), - (get_call_func_closure, (42,), {}), - (get_call_func_closure(42), (), {}), + (call_func_noop, (), {}), + (call_func_return_shareable, (), {}), + (call_func_return_pickleable, (), {}), (Spam.from_values, (), {}), (Spam.from_values, (1, 2, 3), {}), - (Spam, ('???'), {}), (Spam(101), (), {}), (Spam(10101).run, (), {}), + (Spam.noop, (), {}), (call_func_complex, ('ident', 'spam'), {}), (call_func_complex, ('full-ident', 'spam'), {}), (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}), (call_func_complex, ('globals',), {}), (call_func_complex, ('interpid',), {}), - (call_func_complex, ('closure',), {'value': '~~~'}), (call_func_complex, ('custom', 'spam!'), {}), - (call_func_complex, ('custom-inner', 'eggs!'), {}), - (call_func_complex, ('???',), {'exc': ValueError('spam')}), - ]): - with self.subTest(f'invalid case #{i+1}'): - with self.assertRaises(Exception): - if args or kwargs: - raise Exception((args, kwargs)) - interp.call(callable) - - with self.assertRaises(ExecutionFailed): - interp.call(call_func_failure) - - def test_call_in_thread(self): - interp = interpreters.create() - - for i, (callable, args, kwargs) in enumerate([ - (call_func_noop, (), {}), - (call_func_return_shareable, (), {}), - (call_func_return_not_shareable, (), {}), - (Spam.noop, (), {}), ]): with self.subTest(f'success case #{i+1}'): with self.captured_thread_exception() as ctx: - t = interp.call_in_thread(callable) + t = interp.call_in_thread(callable, *args, **kwargs) t.join() self.assertIsNone(ctx.caught) for i, (callable, args, kwargs) in enumerate([ - (call_func_ident, ('spamspamspam',), {}), (get_call_func_closure, (42,), {}), (get_call_func_closure(42), (), {}), - (Spam.from_values, (), {}), - (Spam.from_values, (1, 2, 3), {}), - (Spam, ('???'), {}), - (Spam(101), (), {}), - (Spam(10101).run, (), {}), - (call_func_complex, ('ident', 'spam'), {}), - (call_func_complex, ('full-ident', 'spam'), {}), - (call_func_complex, ('full-ident', 'spam', 'ham'), {'eggs': '!!!'}), - (call_func_complex, ('globals',), {}), - (call_func_complex, ('interpid',), {}), - (call_func_complex, ('closure',), {'value': '~~~'}), - (call_func_complex, ('custom', 'spam!'), {}), - (call_func_complex, ('custom-inner', 'eggs!'), {}), - (call_func_complex, ('???',), {'exc': ValueError('spam')}), ]): with self.subTest(f'invalid case #{i+1}'): - if args or kwargs: - continue with self.captured_thread_exception() as ctx: - t = interp.call_in_thread(callable) + t = interp.call_in_thread(callable, *args, **kwargs) t.join() self.assertIsNotNone(ctx.caught) @@ -1452,6 +2201,14 @@ def test_destroy(self): self.assertFalse( self.interp_exists(interpid)) + with self.subTest('basic C-API'): + interpid = _testinternalcapi.create_interpreter() + self.assertTrue( + self.interp_exists(interpid)) + _testinternalcapi.destroy_interpreter(interpid, basic=True) + self.assertFalse( + self.interp_exists(interpid)) + def test_get_config(self): # This test overlaps with # test.test_capi.test_misc.InterpreterConfigTests. @@ -1529,6 +2286,16 @@ def test_whence(self): whence = eval(text) self.assertEqual(whence, _interpreters.WHENCE_LEGACY_CAPI) + def test_contextvars_missing(self): + script = f""" + import contextvars + print(getattr(contextvars.Token, "MISSING", "'doesn't exist'")) + """ + + orig = _interpreters.create() + text = self.run_and_capture(orig, script) + self.assertEqual(text.strip(), "<Token.MISSING>") + def test_is_running(self): def check(interpid, expected): with self.assertRaisesRegex(InterpreterError, 'unrecognized'): @@ -1585,18 +2352,14 @@ def test_exec(self): with results: exc = _interpreters.exec(interpid, script) out = results.stdout() - self.assertEqual(out, '') - self.assert_ns_equal(exc, types.SimpleNamespace( - type=types.SimpleNamespace( - __name__='Exception', - __qualname__='Exception', - __module__='builtins', - ), - msg='uh-oh!', + expected = build_excinfo( + Exception, 'uh-oh!', # We check these in other tests. formatted=exc.formatted, errdisplay=exc.errdisplay, - )) + ) + self.assertEqual(out, '') + self.assert_ns_equal(exc, expected) with self.subTest('from C-API'): with self.interpreter_from_capi() as interpid: @@ -1608,25 +2371,50 @@ def test_exec(self): self.assertEqual(exc.msg, 'it worked!') def test_call(self): - with self.subTest('no args'): - interpid = _interpreters.create() - exc = _interpreters.call(interpid, call_func_return_shareable) - self.assertIs(exc, None) + interpid = _interpreters.create() + + # Here we focus on basic args and return values. + # See TestInterpreterCall for full operational coverage, + # including supported callables. + + with self.subTest('no args, return None'): + func = defs.spam_minimal + res, exc = _interpreters.call(interpid, func) + self.assertIsNone(exc) + self.assertIsNone(res) + + with self.subTest('empty args, return None'): + func = defs.spam_minimal + res, exc = _interpreters.call(interpid, func, (), {}) + self.assertIsNone(exc) + self.assertIsNone(res) + + with self.subTest('no args, return non-None'): + func = defs.script_with_return + res, exc = _interpreters.call(interpid, func) + self.assertIsNone(exc) + self.assertIs(res, True) + + with self.subTest('full args, return non-None'): + expected = (1, 2, 3, 4, 5, 6, (7, 8), {'g': 9, 'h': 0}) + func = defs.spam_full_args + args = (1, 2, 3, 4, 7, 8) + kwargs = dict(e=5, f=6, g=9, h=0) + res, exc = _interpreters.call(interpid, func, args, kwargs) + self.assertIsNone(exc) + self.assertEqual(res, expected) with self.subTest('uncaught exception'): - interpid = _interpreters.create() - exc = _interpreters.call(interpid, call_func_failure) - self.assertEqual(exc, types.SimpleNamespace( - type=types.SimpleNamespace( - __name__='Exception', - __qualname__='Exception', - __module__='builtins', - ), - msg='spam!', + func = defs.spam_raises + res, exc = _interpreters.call(interpid, func) + expected = build_excinfo( + Exception, 'spam!', # We check these in other tests. formatted=exc.formatted, errdisplay=exc.errdisplay, - )) + ) + self.assertIsNone(res) + self.assertEqual(exc, expected) @requires_test_modules def test_set___main___attrs(self): diff --git a/Lib/test/test_interpreters/test_channels.py b/Lib/test/test_interpreters/test_channels.py index eada18f99d04db..52827357078b85 100644 --- a/Lib/test/test_interpreters/test_channels.py +++ b/Lib/test/test_interpreters/test_channels.py @@ -8,8 +8,8 @@ from test.support import import_helper # Raise SkipTest if subinterpreters not supported. _channels = import_helper.import_module('_interpchannels') -from test.support import interpreters -from test.support.interpreters import channels +from concurrent import interpreters +from test.support import channels from .utils import _run_output, TestBase @@ -121,9 +121,11 @@ def test_equality(self): def test_pickle(self): ch, _ = channels.create() - data = pickle.dumps(ch) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, ch) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(ch, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, ch) class TestSendChannelAttrs(TestBase): @@ -152,9 +154,11 @@ def test_equality(self): def test_pickle(self): _, ch = channels.create() - data = pickle.dumps(ch) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, ch) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(ch, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, ch) class TestSendRecv(TestBase): @@ -171,7 +175,7 @@ def test_send_recv_main(self): def test_send_recv_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import channels + from test.support import channels r, s = channels.create() orig = b'spam' s.send_nowait(orig) @@ -244,7 +248,7 @@ def test_send_recv_nowait_main_with_default(self): def test_send_recv_nowait_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import channels + from test.support import channels r, s = channels.create() orig = b'spam' s.send_nowait(orig) @@ -377,17 +381,17 @@ def common(rch, sch, unbound=None, presize=0): if not unbound: extraargs = '' elif unbound is channels.UNBOUND: - extraargs = ', unbound=channels.UNBOUND' + extraargs = ', unbounditems=channels.UNBOUND' elif unbound is channels.UNBOUND_ERROR: - extraargs = ', unbound=channels.UNBOUND_ERROR' + extraargs = ', unbounditems=channels.UNBOUND_ERROR' elif unbound is channels.UNBOUND_REMOVE: - extraargs = ', unbound=channels.UNBOUND_REMOVE' + extraargs = ', unbounditems=channels.UNBOUND_REMOVE' else: raise NotImplementedError(repr(unbound)) interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import channels + from test.support import channels sch = channels.SendChannel({sch.id}) obj1 = b'spam' obj2 = b'eggs' @@ -454,11 +458,11 @@ def common(rch, sch, unbound=None, presize=0): with self.assertRaises(channels.ChannelEmptyError): rch.recv_nowait() - sch.send_nowait(b'ham', unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(b'ham', unbounditems=channels.UNBOUND_REMOVE) self.assertEqual(_channels.get_count(rch.id), 1) interp = common(rch, sch, channels.UNBOUND_REMOVE, 1) self.assertEqual(_channels.get_count(rch.id), 3) - sch.send_nowait(42, unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(42, unbounditems=channels.UNBOUND_REMOVE) self.assertEqual(_channels.get_count(rch.id), 4) del interp self.assertEqual(_channels.get_count(rch.id), 2) @@ -482,13 +486,13 @@ def test_send_cleared_with_subinterpreter_mixed(self): self.assertEqual(_channels.get_count(rch.id), 0) _run_output(interp, dedent(f""" - from test.support.interpreters import channels + from test.support import channels sch = channels.SendChannel({sch.id}) - sch.send_nowait(1, unbound=channels.UNBOUND) - sch.send_nowait(2, unbound=channels.UNBOUND_ERROR) + sch.send_nowait(1, unbounditems=channels.UNBOUND) + sch.send_nowait(2, unbounditems=channels.UNBOUND_ERROR) sch.send_nowait(3) - sch.send_nowait(4, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(5, unbound=channels.UNBOUND) + sch.send_nowait(4, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(5, unbounditems=channels.UNBOUND) """)) self.assertEqual(_channels.get_count(rch.id), 5) @@ -518,15 +522,15 @@ def test_send_cleared_with_subinterpreter_multiple(self): sch.send_nowait(1) _run_output(interp1, dedent(f""" - from test.support.interpreters import channels + from test.support import channels rch = channels.RecvChannel({rch.id}) sch = channels.SendChannel({sch.id}) obj1 = rch.recv() - sch.send_nowait(2, unbound=channels.UNBOUND) - sch.send_nowait(obj1, unbound=channels.UNBOUND_REMOVE) + sch.send_nowait(2, unbounditems=channels.UNBOUND) + sch.send_nowait(obj1, unbounditems=channels.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" - from test.support.interpreters import channels + from test.support import channels rch = channels.RecvChannel({rch.id}) sch = channels.SendChannel({sch.id}) obj2 = rch.recv() @@ -535,21 +539,21 @@ def test_send_cleared_with_subinterpreter_multiple(self): self.assertEqual(_channels.get_count(rch.id), 0) sch.send_nowait(3) _run_output(interp1, dedent(""" - sch.send_nowait(4, unbound=channels.UNBOUND) + sch.send_nowait(4, unbounditems=channels.UNBOUND) # interp closed here - sch.send_nowait(5, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(6, unbound=channels.UNBOUND) + sch.send_nowait(5, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(6, unbounditems=channels.UNBOUND) """)) _run_output(interp2, dedent(""" - sch.send_nowait(7, unbound=channels.UNBOUND_ERROR) + sch.send_nowait(7, unbounditems=channels.UNBOUND_ERROR) # interp closed here - sch.send_nowait(obj1, unbound=channels.UNBOUND_ERROR) - sch.send_nowait(obj2, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(8, unbound=channels.UNBOUND) + sch.send_nowait(obj1, unbounditems=channels.UNBOUND_ERROR) + sch.send_nowait(obj2, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(8, unbounditems=channels.UNBOUND) """)) _run_output(interp1, dedent(""" - sch.send_nowait(9, unbound=channels.UNBOUND_REMOVE) - sch.send_nowait(10, unbound=channels.UNBOUND) + sch.send_nowait(9, unbounditems=channels.UNBOUND_REMOVE) + sch.send_nowait(10, unbounditems=channels.UNBOUND) """)) self.assertEqual(_channels.get_count(rch.id), 10) diff --git a/Lib/test/test_interpreters/test_lifecycle.py b/Lib/test/test_interpreters/test_lifecycle.py index ac24f6568acd95..39c1441b67c133 100644 --- a/Lib/test/test_interpreters/test_lifecycle.py +++ b/Lib/test/test_interpreters/test_lifecycle.py @@ -119,7 +119,7 @@ def test_sys_path_0(self): # The main interpreter's sys.path[0] should be used by subinterpreters. script = ''' import sys - from test.support import interpreters + from concurrent import interpreters orig = sys.path[0] @@ -132,6 +132,7 @@ def test_sys_path_0(self): 'sub': sys.path[0], }}, indent=4), flush=True) """) + interp.close() ''' # <tmp>/ # pkg/ @@ -170,9 +171,12 @@ def test_gh_109793(self): # is reported, even when subinterpreters get cleaned up at the end. import subprocess argv = [sys.executable, '-c', '''if True: - from test.support import interpreters + from concurrent import interpreters interp = interpreters.create() - raise Exception + try: + raise Exception + finally: + interp.close() '''] proc = subprocess.run(argv, capture_output=True, text=True) self.assertIn('Traceback', proc.stderr) diff --git a/Lib/test/test_interpreters/test_queues.py b/Lib/test/test_interpreters/test_queues.py index 18f83d097eb360..8c6a0efbb920d8 100644 --- a/Lib/test/test_interpreters/test_queues.py +++ b/Lib/test/test_interpreters/test_queues.py @@ -7,11 +7,12 @@ from test.support import import_helper, Py_DEBUG # Raise SkipTest if subinterpreters not supported. _queues = import_helper.import_module('_interpqueues') -from test.support import interpreters -from test.support.interpreters import queues, _crossinterp +from concurrent import interpreters +from concurrent.interpreters import _queues as queues, _crossinterp +import test._crossinterp_definitions as defs from .utils import _run_output, TestBase as _TestBase - +HUGE_TIMEOUT = 3600 REPLACE = _crossinterp._UNBOUND_CONSTANT_TO_FLAG[_crossinterp.UNBOUND] @@ -42,7 +43,7 @@ def test_highlevel_reloaded(self): importlib.reload(queues) def test_create_destroy(self): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.destroy(qid) self.assertEqual(get_num_queues(), 0) with self.assertRaises(queues.QueueNotFoundError): @@ -56,7 +57,7 @@ def test_not_destroyed(self): '-c', dedent(f""" import {_queues.__name__} as _queues - _queues.create(2, 0, {REPLACE}) + _queues.create(2, {REPLACE}, -1) """), ) self.assertEqual(stdout, '') @@ -67,13 +68,13 @@ def test_not_destroyed(self): def test_bind_release(self): with self.subTest('typical'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.release(qid) self.assertEqual(get_num_queues(), 0) with self.subTest('bind too much'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.bind(qid) _queues.release(qid) @@ -81,7 +82,7 @@ def test_bind_release(self): self.assertEqual(get_num_queues(), 0) with self.subTest('nested'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) _queues.bind(qid) _queues.bind(qid) _queues.release(qid) @@ -89,7 +90,7 @@ def test_bind_release(self): self.assertEqual(get_num_queues(), 0) with self.subTest('release without binding'): - qid = _queues.create(2, 0, REPLACE) + qid = _queues.create(2, REPLACE, -1) with self.assertRaises(queues.QueueError): _queues.release(qid) @@ -126,19 +127,19 @@ def test_shareable(self): interp = interpreters.create() interp.exec(dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue1 = queues.Queue({queue1.id}) """)); with self.subTest('same interpreter'): queue2 = queues.create() - queue1.put(queue2, syncobj=True) + queue1.put(queue2) queue3 = queue1.get() self.assertIs(queue3, queue2) with self.subTest('from current interpreter'): queue4 = queues.create() - queue1.put(queue4, syncobj=True) + queue1.put(queue4) out = _run_output(interp, dedent(""" queue4 = queue1.get() print(queue4.id) @@ -149,7 +150,7 @@ def test_shareable(self): with self.subTest('from subinterpreter'): out = _run_output(interp, dedent(""" queue5 = queues.create() - queue1.put(queue5, syncobj=True) + queue1.put(queue5) print(queue5.id) """)) qid = int(out) @@ -188,9 +189,11 @@ def test_equality(self): def test_pickle(self): queue = queues.create() - data = pickle.dumps(queue) - unpickled = pickle.loads(data) - self.assertEqual(unpickled, queue) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=protocol): + data = pickle.dumps(queue, protocol) + unpickled = pickle.loads(data) + self.assertEqual(unpickled, queue) class TestQueueOps(TestBase): @@ -198,7 +201,7 @@ class TestQueueOps(TestBase): def test_empty(self): queue = queues.create() before = queue.empty() - queue.put(None, syncobj=True) + queue.put(None) during = queue.empty() queue.get() after = queue.empty() @@ -208,18 +211,64 @@ def test_empty(self): self.assertIs(after, True) def test_full(self): - expected = [False, False, False, True, False, False, False] - actual = [] - queue = queues.create(3) - for _ in range(3): - actual.append(queue.full()) - queue.put(None, syncobj=True) - actual.append(queue.full()) - for _ in range(3): - queue.get() - actual.append(queue.full()) + for maxsize in [1, 3, 11]: + with self.subTest(f'maxsize={maxsize}'): + num_to_add = maxsize + expected = [False] * (num_to_add * 2 + 3) + expected[maxsize] = True + expected[maxsize + 1] = True + + queue = queues.create(maxsize) + actual = [] + empty = [queue.empty()] + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.put_nowait(None) + actual.append(queue.full()) + with self.assertRaises(queues.QueueFull): + queue.put_nowait(None) + empty.append(queue.empty()) + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.get_nowait() + actual.append(queue.full()) + with self.assertRaises(queues.QueueEmpty): + queue.get_nowait() + actual.append(queue.full()) + empty.append(queue.empty()) - self.assertEqual(actual, expected) + self.assertEqual(actual, expected) + self.assertEqual(empty, [True, False, True]) + + # no max size + for args in [(), (0,), (-1,), (-10,)]: + with self.subTest(f'maxsize={args[0]}' if args else '<default>'): + num_to_add = 13 + expected = [False] * (num_to_add * 2 + 3) + + queue = queues.create(*args) + actual = [] + empty = [queue.empty()] + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.put_nowait(None) + actual.append(queue.full()) + empty.append(queue.empty()) + + for _ in range(num_to_add): + actual.append(queue.full()) + queue.get_nowait() + actual.append(queue.full()) + with self.assertRaises(queues.QueueEmpty): + queue.get_nowait() + actual.append(queue.full()) + empty.append(queue.empty()) + + self.assertEqual(actual, expected) + self.assertEqual(empty, [True, False, True]) def test_qsize(self): expected = [0, 1, 2, 3, 2, 3, 2, 1, 0, 1, 0] @@ -227,16 +276,16 @@ def test_qsize(self): queue = queues.create() for _ in range(3): actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) queue.get() actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) for _ in range(3): queue.get() actual.append(queue.qsize()) - queue.put(None, syncobj=True) + queue.put(None) actual.append(queue.qsize()) queue.get() actual.append(queue.qsize()) @@ -245,70 +294,38 @@ def test_qsize(self): def test_put_get_main(self): expected = list(range(20)) - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create() - for i in range(20): - queue.put(i, **kwds) - actual = [queue.get() for _ in range(20)] + queue = queues.create() + for i in range(20): + queue.put(i) + actual = [queue.get() for _ in range(20)] - self.assertEqual(actual, expected) + self.assertEqual(actual, expected) def test_put_timeout(self): - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create(2) - queue.put(None, **kwds) - queue.put(None, **kwds) - with self.assertRaises(queues.QueueFull): - queue.put(None, timeout=0.1, **kwds) - queue.get() - queue.put(None, **kwds) + queue = queues.create(2) + queue.put(None) + queue.put(None) + with self.assertRaises(queues.QueueFull): + queue.put(None, timeout=0.1) + with self.assertRaises(queues.QueueFull): + queue.put(None, HUGE_TIMEOUT, 0.1) + queue.get() + queue.put(None) def test_put_nowait(self): - for syncobj in (True, False): - kwds = dict(syncobj=syncobj) - with self.subTest(f'syncobj={syncobj}'): - queue = queues.create(2) - queue.put_nowait(None, **kwds) - queue.put_nowait(None, **kwds) - with self.assertRaises(queues.QueueFull): - queue.put_nowait(None, **kwds) - queue.get() - queue.put_nowait(None, **kwds) - - def test_put_syncobj(self): - for obj in [ - None, - True, - 10, - 'spam', - b'spam', - (0, 'a'), - ]: - with self.subTest(repr(obj)): - queue = queues.create() - - queue.put(obj, syncobj=True) - obj2 = queue.get() - self.assertEqual(obj2, obj) - - queue.put(obj, syncobj=True) - obj2 = queue.get_nowait() - self.assertEqual(obj2, obj) - - for obj in [ - [1, 2, 3], - {'a': 13, 'b': 17}, - ]: - with self.subTest(repr(obj)): - queue = queues.create() - with self.assertRaises(interpreters.NotShareableError): - queue.put(obj, syncobj=True) + queue = queues.create(2) + queue.put_nowait(None) + queue.put_nowait(None) + with self.assertRaises(queues.QueueFull): + queue.put_nowait(None) + with self.assertRaises(queues.QueueFull): + queue.put(None, False) + with self.assertRaises(queues.QueueFull): + queue.put(None, False, timeout=HUGE_TIMEOUT) + queue.get() + queue.put_nowait(None) - def test_put_not_syncobj(self): + def test_put_full_fallback(self): for obj in [ None, True, @@ -323,11 +340,11 @@ def test_put_not_syncobj(self): with self.subTest(repr(obj)): queue = queues.create() - queue.put(obj, syncobj=False) + queue.put(obj) obj2 = queue.get() self.assertEqual(obj2, obj) - queue.put(obj, syncobj=False) + queue.put(obj) obj2 = queue.get_nowait() self.assertEqual(obj2, obj) @@ -335,30 +352,21 @@ def test_get_timeout(self): queue = queues.create() with self.assertRaises(queues.QueueEmpty): queue.get(timeout=0.1) + with self.assertRaises(queues.QueueEmpty): + queue.get(HUGE_TIMEOUT, 0.1) def test_get_nowait(self): queue = queues.create() with self.assertRaises(queues.QueueEmpty): queue.get_nowait() + with self.assertRaises(queues.QueueEmpty): + queue.get(False) + with self.assertRaises(queues.QueueEmpty): + queue.get(False, timeout=HUGE_TIMEOUT) - def test_put_get_default_syncobj(self): - expected = list(range(20)) - queue = queues.create(syncobj=True) - for methname in ('get', 'get_nowait'): - with self.subTest(f'{methname}()'): - get = getattr(queue, methname) - for i in range(20): - queue.put(i) - actual = [get() for _ in range(20)] - self.assertEqual(actual, expected) - - obj = [1, 2, 3] # lists are not shareable - with self.assertRaises(interpreters.NotShareableError): - queue.put(obj) - - def test_put_get_default_not_syncobj(self): + def test_put_get_full_fallback(self): expected = list(range(20)) - queue = queues.create(syncobj=False) + queue = queues.create() for methname in ('get', 'get_nowait'): with self.subTest(f'{methname}()'): get = getattr(queue, methname) @@ -377,14 +385,14 @@ def test_put_get_default_not_syncobj(self): def test_put_get_same_interpreter(self): interp = interpreters.create() interp.exec(dedent(""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.create() """)) for methname in ('get', 'get_nowait'): with self.subTest(f'{methname}()'): interp.exec(dedent(f""" orig = b'spam' - queue.put(orig, syncobj=True) + queue.put(orig) obj = queue.{methname}() assert obj == orig, 'expected: obj == orig' assert obj is not orig, 'expected: obj is not orig' @@ -399,12 +407,12 @@ def test_put_get_different_interpreters(self): for methname in ('get', 'get_nowait'): with self.subTest(f'{methname}()'): obj1 = b'spam' - queue1.put(obj1, syncobj=True) + queue1.put(obj1) out = _run_output( interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue1 = queues.Queue({queue1.id}) queue2 = queues.Queue({queue2.id}) assert queue1.qsize() == 1, 'expected: queue1.qsize() == 1' @@ -416,7 +424,7 @@ def test_put_get_different_interpreters(self): obj2 = b'eggs' print(id(obj2)) assert queue2.qsize() == 0, 'expected: queue2.qsize() == 0' - queue2.put(obj2, syncobj=True) + queue2.put(obj2) assert queue2.qsize() == 1, 'expected: queue2.qsize() == 1' """)) self.assertEqual(len(queues.list_all()), 2) @@ -433,22 +441,22 @@ def common(queue, unbound=None, presize=0): if not unbound: extraargs = '' elif unbound is queues.UNBOUND: - extraargs = ', unbound=queues.UNBOUND' + extraargs = ', unbounditems=queues.UNBOUND' elif unbound is queues.UNBOUND_ERROR: - extraargs = ', unbound=queues.UNBOUND_ERROR' + extraargs = ', unbounditems=queues.UNBOUND_ERROR' elif unbound is queues.UNBOUND_REMOVE: - extraargs = ', unbound=queues.UNBOUND_REMOVE' + extraargs = ', unbounditems=queues.UNBOUND_REMOVE' else: raise NotImplementedError(repr(unbound)) interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj1 = b'spam' obj2 = b'eggs' - queue.put(obj1, syncobj=True{extraargs}) - queue.put(obj2, syncobj=True{extraargs}) + queue.put(obj1{extraargs}) + queue.put(obj2{extraargs}) """)) self.assertEqual(queue.qsize(), presize + 2) @@ -501,11 +509,11 @@ def common(queue, unbound=None, presize=0): with self.assertRaises(queues.QueueEmpty): queue.get_nowait() - queue.put(b'ham', unbound=queues.UNBOUND_REMOVE) + queue.put(b'ham', unbounditems=queues.UNBOUND_REMOVE) self.assertEqual(queue.qsize(), 1) interp = common(queue, queues.UNBOUND_REMOVE, 1) self.assertEqual(queue.qsize(), 3) - queue.put(42, unbound=queues.UNBOUND_REMOVE) + queue.put(42, unbounditems=queues.UNBOUND_REMOVE) self.assertEqual(queue.qsize(), 4) del interp self.assertEqual(queue.qsize(), 2) @@ -521,13 +529,13 @@ def test_put_cleared_with_subinterpreter_mixed(self): queue = queues.create() interp = interpreters.create() _run_output(interp, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) - queue.put(1, syncobj=True, unbound=queues.UNBOUND) - queue.put(2, syncobj=True, unbound=queues.UNBOUND_ERROR) - queue.put(3, syncobj=True) - queue.put(4, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(5, syncobj=True, unbound=queues.UNBOUND) + queue.put(1, unbounditems=queues.UNBOUND) + queue.put(2, unbounditems=queues.UNBOUND_ERROR) + queue.put(3) + queue.put(4, unbounditems=queues.UNBOUND_REMOVE) + queue.put(5, unbounditems=queues.UNBOUND) """)) self.assertEqual(queue.qsize(), 5) @@ -555,16 +563,16 @@ def test_put_cleared_with_subinterpreter_multiple(self): interp1 = interpreters.create() interp2 = interpreters.create() - queue.put(1, syncobj=True) + queue.put(1) _run_output(interp1, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj1 = queue.get() - queue.put(2, syncobj=True, unbound=queues.UNBOUND) - queue.put(obj1, syncobj=True, unbound=queues.UNBOUND_REMOVE) + queue.put(2, unbounditems=queues.UNBOUND) + queue.put(obj1, unbounditems=queues.UNBOUND_REMOVE) """)) _run_output(interp2, dedent(f""" - from test.support.interpreters import queues + from concurrent.interpreters import _queues as queues queue = queues.Queue({queue.id}) obj2 = queue.get() obj1 = queue.get() @@ -572,21 +580,21 @@ def test_put_cleared_with_subinterpreter_multiple(self): self.assertEqual(queue.qsize(), 0) queue.put(3) _run_output(interp1, dedent(""" - queue.put(4, syncobj=True, unbound=queues.UNBOUND) + queue.put(4, unbounditems=queues.UNBOUND) # interp closed here - queue.put(5, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(6, syncobj=True, unbound=queues.UNBOUND) + queue.put(5, unbounditems=queues.UNBOUND_REMOVE) + queue.put(6, unbounditems=queues.UNBOUND) """)) _run_output(interp2, dedent(""" - queue.put(7, syncobj=True, unbound=queues.UNBOUND_ERROR) + queue.put(7, unbounditems=queues.UNBOUND_ERROR) # interp closed here - queue.put(obj1, syncobj=True, unbound=queues.UNBOUND_ERROR) - queue.put(obj2, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(8, syncobj=True, unbound=queues.UNBOUND) + queue.put(obj1, unbounditems=queues.UNBOUND_ERROR) + queue.put(obj2, unbounditems=queues.UNBOUND_REMOVE) + queue.put(8, unbounditems=queues.UNBOUND) """)) _run_output(interp1, dedent(""" - queue.put(9, syncobj=True, unbound=queues.UNBOUND_REMOVE) - queue.put(10, syncobj=True, unbound=queues.UNBOUND) + queue.put(9, unbounditems=queues.UNBOUND_REMOVE) + queue.put(10, unbounditems=queues.UNBOUND) """)) self.assertEqual(queue.qsize(), 10) @@ -642,12 +650,12 @@ def f(): break except queues.QueueEmpty: continue - queue2.put(obj, syncobj=True) + queue2.put(obj) t = threading.Thread(target=f) t.start() orig = b'spam' - queue1.put(orig, syncobj=True) + queue1.put(orig) obj = queue2.get() t.join() diff --git a/Lib/test/test_interpreters/test_stress.py b/Lib/test/test_interpreters/test_stress.py index fae2f38cb5534b..6b40a536bd3c31 100644 --- a/Lib/test/test_interpreters/test_stress.py +++ b/Lib/test/test_interpreters/test_stress.py @@ -6,7 +6,8 @@ from test.support import threading_helper # Raise SkipTest if subinterpreters not supported. import_helper.import_module('_interpreters') -from test.support import interpreters +from concurrent import interpreters +from concurrent.interpreters import InterpreterError from .utils import TestBase @@ -74,6 +75,14 @@ def run(): start.set() support.gc_collect() + def test_create_interpreter_no_memory(self): + import _interpreters + _testcapi = import_helper.import_module("_testcapi") + + with self.assertRaises(InterpreterError): + _testcapi.set_nomemory(0, 1) + _interpreters.create() + if __name__ == '__main__': # Test needs to be a package, so we can do relative imports. diff --git a/Lib/test/test_interpreters/utils.py b/Lib/test/test_interpreters/utils.py index fc4ad662e03b66..d5d307d4973c12 100644 --- a/Lib/test/test_interpreters/utils.py +++ b/Lib/test/test_interpreters/utils.py @@ -22,7 +22,7 @@ import _interpreters except ImportError as exc: raise unittest.SkipTest(str(exc)) -from test.support import interpreters +from concurrent import interpreters try: diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 5a8f1949baaa98..6514af8b1253c3 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -572,7 +572,7 @@ def do_test(test, obj, abilities): for [test, abilities] in tests: with self.subTest(test): if test == pipe_writer and not threading_helper.can_start_thread: - skipTest() + self.skipTest("Need threads") with test() as obj: do_test(test, obj, abilities) @@ -893,6 +893,22 @@ def test_RawIOBase_read(self): self.assertEqual(rawio.read(2), None) self.assertEqual(rawio.read(2), b"") + def test_RawIOBase_read_bounds_checking(self): + # Make sure a `.readinto` call which returns a value outside + # (0, len(buffer)) raises. + class Misbehaved(self.RawIOBase): + def __init__(self, readinto_return) -> None: + self._readinto_return = readinto_return + def readinto(self, b): + return self._readinto_return + + with self.assertRaises(ValueError) as cm: + Misbehaved(2).read(1) + self.assertEqual(str(cm.exception), "readinto returned 2 outside buffer size 1") + for bad_size in (2147483647, sys.maxsize, -1, -1000): + with self.assertRaises(ValueError): + Misbehaved(bad_size).read() + def test_types_have_dict(self): test = ( self.IOBase(), @@ -902,7 +918,7 @@ def test_types_have_dict(self): self.BytesIO() ) for obj in test: - self.assertTrue(hasattr(obj, "__dict__")) + self.assertHasAttr(obj, "__dict__") def test_opener(self): with self.open(os_helper.TESTFN, "w", encoding="utf-8") as f: @@ -918,7 +934,7 @@ def test_bad_opener_negative_1(self): def badopener(fname, flags): return -1 with self.assertRaises(ValueError) as cm: - open('non-existent', 'r', opener=badopener) + self.open('non-existent', 'r', opener=badopener) self.assertEqual(str(cm.exception), 'opener returned -1') def test_bad_opener_other_negative(self): @@ -926,7 +942,7 @@ def test_bad_opener_other_negative(self): def badopener(fname, flags): return -2 with self.assertRaises(ValueError) as cm: - open('non-existent', 'r', opener=badopener) + self.open('non-existent', 'r', opener=badopener) self.assertEqual(str(cm.exception), 'opener returned -2') def test_opener_invalid_fd(self): @@ -1062,6 +1078,37 @@ def flush(self): # Silence destructor error R.flush = lambda self: None + @threading_helper.requires_working_threading() + def test_write_readline_races(self): + # gh-134908: Concurrent iteration over a file caused races + thread_count = 2 + write_count = 100 + read_count = 100 + + def writer(file, barrier): + barrier.wait() + for _ in range(write_count): + file.write("x") + + def reader(file, barrier): + barrier.wait() + for _ in range(read_count): + for line in file: + self.assertEqual(line, "") + + with self.open(os_helper.TESTFN, "w+") as f: + barrier = threading.Barrier(thread_count + 1) + reader = threading.Thread(target=reader, args=(f, barrier)) + writers = [threading.Thread(target=writer, args=(f, barrier)) + for _ in range(thread_count)] + with threading_helper.catch_threading_exception() as cm: + with threading_helper.start_threads(writers + [reader]): + pass + self.assertIsNone(cm.exc_type) + + self.assertEqual(os.stat(os_helper.TESTFN).st_size, + write_count * thread_count) + class CIOTest(IOTest): @@ -1117,7 +1164,7 @@ def test_class_hierarchy(self): def check_subs(types, base): for tp in types: with self.subTest(tp=tp, base=base): - self.assertTrue(issubclass(tp, base)) + self.assertIsSubclass(tp, base) def recursive_check(d): for k, v in d.items(): @@ -1870,7 +1917,7 @@ def test_write_overflow(self): flushed = b"".join(writer._write_stack) # At least (total - 8) bytes were implicitly flushed, perhaps more # depending on the implementation. - self.assertTrue(flushed.startswith(contents[:-8]), flushed) + self.assertStartsWith(flushed, contents[:-8]) def check_writes(self, intermediate_func): # Lots of writes, test the flushed output is as expected. @@ -1940,7 +1987,7 @@ def test_write_non_blocking(self): self.assertEqual(bufio.write(b"ABCDEFGHI"), 9) s = raw.pop_written() # Previously buffered bytes were flushed - self.assertTrue(s.startswith(b"01234567A"), s) + self.assertStartsWith(s, b"01234567A") def test_write_and_rewind(self): raw = self.BytesIO() @@ -2236,7 +2283,7 @@ def test_write(self): def test_peek(self): pair = self.tp(self.BytesIO(b"abcdef"), self.MockRawIO()) - self.assertTrue(pair.peek(3).startswith(b"abc")) + self.assertStartsWith(pair.peek(3), b"abc") self.assertEqual(pair.read(3), b"abc") def test_readable(self): @@ -3290,6 +3337,24 @@ def test_multibyte_seek_and_tell(self): self.assertEqual(f.tell(), p1) f.close() + def test_tell_after_readline_with_cr(self): + # Test for gh-141314: TextIOWrapper.tell() assertion failure + # when dealing with standalone carriage returns + data = b'line1\r' + with self.open(os_helper.TESTFN, "wb") as f: + f.write(data) + + with self.open(os_helper.TESTFN, "r") as f: + # Read line that ends with \r + line = f.readline() + self.assertEqual(line, "line1\n") + # This should not cause an assertion failure + pos = f.tell() + # Verify we can seek back to this position + f.seek(pos) + remaining = f.read() + self.assertEqual(remaining, "") + def test_seek_with_encoder_state(self): f = self.open(os_helper.TESTFN, "w", encoding="euc_jis_2004") f.write("\u00e6\u0300") @@ -4417,7 +4482,7 @@ def test_abc_inheritance_official(self): self._check_abc_inheritance(io) def _check_warn_on_dealloc(self, *args, **kwargs): - f = open(*args, **kwargs) + f = self.open(*args, **kwargs) r = repr(f) with self.assertWarns(ResourceWarning) as cm: f = None @@ -4446,7 +4511,7 @@ def cleanup_fds(): r, w = os.pipe() fds += r, w with warnings_helper.check_no_resource_warning(self): - open(r, *args, closefd=False, **kwargs) + self.open(r, *args, closefd=False, **kwargs) @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_warn_on_dealloc_fd(self): @@ -4618,10 +4683,8 @@ def test_check_encoding_warning(self): proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) - self.assertTrue( - warnings[0].startswith(b"<string>:5: EncodingWarning: ")) - self.assertTrue( - warnings[1].startswith(b"<string>:8: EncodingWarning: ")) + self.assertStartsWith(warnings[0], b"<string>:5: EncodingWarning: ") + self.assertStartsWith(warnings[1], b"<string>:8: EncodingWarning: ") def test_text_encoding(self): # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8" @@ -4834,7 +4897,7 @@ def on_alarm(*args): os.read(r, len(data) * 100) exc = cm.exception if isinstance(exc, RuntimeError): - self.assertTrue(str(exc).startswith("reentrant call"), str(exc)) + self.assertStartsWith(str(exc), "reentrant call") finally: signal.alarm(0) wio.close() @@ -4985,12 +5048,12 @@ def write(self, b: bytes): pass def test_reader_subclass(self): - self.assertIsSubclass(MyReader, io.Reader[bytes]) - self.assertNotIsSubclass(str, io.Reader[bytes]) + self.assertIsSubclass(self.MyReader, io.Reader) + self.assertNotIsSubclass(str, io.Reader) def test_writer_subclass(self): - self.assertIsSubclass(MyWriter, io.Writer[bytes]) - self.assertNotIsSubclass(str, io.Writer[bytes]) + self.assertIsSubclass(self.MyWriter, io.Writer) + self.assertNotIsSubclass(str, io.Writer) def load_tests(loader, tests, pattern): @@ -5004,6 +5067,7 @@ def load_tests(loader, tests, pattern): CTextIOWrapperTest, PyTextIOWrapperTest, CMiscIOTest, PyMiscIOTest, CSignalsTest, PySignalsTest, TestIOCTypes, + ProtocolsTest, ) # Put the namespaces of the IO module we are testing and some useful mock diff --git a/Lib/test/test_ioctl.py b/Lib/test/test_ioctl.py index 7a986048bdac2a..01e1bfb42dd986 100644 --- a/Lib/test/test_ioctl.py +++ b/Lib/test/test_ioctl.py @@ -5,7 +5,7 @@ import threading import unittest from test import support -from test.support import threading_helper +from test.support import os_helper, threading_helper from test.support.import_helper import import_module fcntl = import_module('fcntl') termios = import_module('termios') @@ -202,6 +202,15 @@ def test_ioctl_set_window_size(self): new_winsz = struct.unpack("HHHH", result) self.assertEqual(new_winsz[:2], (20, 40)) + @unittest.skipUnless(hasattr(fcntl, 'FICLONE'), 'need fcntl.FICLONE') + def test_bad_fd(self): + # gh-134744: Test error handling + fd = os_helper.make_bad_fd() + with self.assertRaises(OSError): + fcntl.ioctl(fd, fcntl.FICLONE, fd) + with self.assertRaises(OSError): + fcntl.ioctl(fd, fcntl.FICLONE, b'\0' * 1024) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index d04012d1afd540..8af91e857d80ed 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -12,6 +12,7 @@ import pickle import ipaddress import weakref +from collections.abc import Iterator from test.support import LARGEST, SMALLEST @@ -397,6 +398,19 @@ def assertBadSplit(addr): # A trailing IPv4 address is two parts assertBadSplit("10:9:8:7:6:5:4:3:42.42.42.42%scope") + def test_bad_address_split_v6_too_long(self): + def assertBadSplit(addr): + msg = r"At most 45 characters expected in '%s" + with self.assertAddressError(msg, re.escape(addr[:45])): + ipaddress.IPv6Address(addr) + + # Long IPv6 address + long_addr = ("0:" * 10000) + "0" + assertBadSplit(long_addr) + assertBadSplit(long_addr + "%zoneid") + assertBadSplit(long_addr + ":255.255.255.255") + assertBadSplit(long_addr + ":ffff:255.255.255.255") + def test_bad_address_split_v6_too_many_parts(self): def assertBadSplit(addr): msg = "Exactly 8 parts expected without '::' in %r" @@ -1459,18 +1473,27 @@ def testGetSupernet4(self): self.ipv6_scoped_network.supernet(new_prefix=62)) def testHosts(self): + hosts = self.ipv4_network.hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(ipaddress.IPv4Address('1.2.3.1'), next(hosts)) hosts = list(self.ipv4_network.hosts()) self.assertEqual(254, len(hosts)) self.assertEqual(ipaddress.IPv4Address('1.2.3.1'), hosts[0]) self.assertEqual(ipaddress.IPv4Address('1.2.3.254'), hosts[-1]) ipv6_network = ipaddress.IPv6Network('2001:658:22a:cafe::/120') + hosts = ipv6_network.hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(ipaddress.IPv6Address('2001:658:22a:cafe::1'), next(hosts)) hosts = list(ipv6_network.hosts()) self.assertEqual(255, len(hosts)) self.assertEqual(ipaddress.IPv6Address('2001:658:22a:cafe::1'), hosts[0]) self.assertEqual(ipaddress.IPv6Address('2001:658:22a:cafe::ff'), hosts[-1]) ipv6_scoped_network = ipaddress.IPv6Network('2001:658:22a:cafe::%scope/120') + hosts = ipv6_scoped_network.hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual((ipaddress.IPv6Address('2001:658:22a:cafe::1')), next(hosts)) hosts = list(ipv6_scoped_network.hosts()) self.assertEqual(255, len(hosts)) self.assertEqual(ipaddress.IPv6Address('2001:658:22a:cafe::1'), hosts[0]) @@ -1481,6 +1504,12 @@ def testHosts(self): ipaddress.IPv4Address('2.0.0.1')] str_args = '2.0.0.0/31' tpl_args = ('2.0.0.0', 31) + hosts = ipaddress.ip_network(str_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) + hosts = ipaddress.ip_network(tpl_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) self.assertEqual(addrs, list(ipaddress.ip_network(str_args).hosts())) self.assertEqual(addrs, list(ipaddress.ip_network(tpl_args).hosts())) self.assertEqual(list(ipaddress.ip_network(str_args).hosts()), @@ -1490,6 +1519,12 @@ def testHosts(self): addrs = [ipaddress.IPv4Address('1.2.3.4')] str_args = '1.2.3.4/32' tpl_args = ('1.2.3.4', 32) + hosts = ipaddress.ip_network(str_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) + hosts = ipaddress.ip_network(tpl_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) self.assertEqual(addrs, list(ipaddress.ip_network(str_args).hosts())) self.assertEqual(addrs, list(ipaddress.ip_network(tpl_args).hosts())) self.assertEqual(list(ipaddress.ip_network(str_args).hosts()), @@ -1499,6 +1534,12 @@ def testHosts(self): ipaddress.IPv6Address('2001:658:22a:cafe::1')] str_args = '2001:658:22a:cafe::/127' tpl_args = ('2001:658:22a:cafe::', 127) + hosts = ipaddress.ip_network(str_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) + hosts = ipaddress.ip_network(tpl_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) self.assertEqual(addrs, list(ipaddress.ip_network(str_args).hosts())) self.assertEqual(addrs, list(ipaddress.ip_network(tpl_args).hosts())) self.assertEqual(list(ipaddress.ip_network(str_args).hosts()), @@ -1507,6 +1548,12 @@ def testHosts(self): addrs = [ipaddress.IPv6Address('2001:658:22a:cafe::1'), ] str_args = '2001:658:22a:cafe::1/128' tpl_args = ('2001:658:22a:cafe::1', 128) + hosts = ipaddress.ip_network(str_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) + hosts = ipaddress.ip_network(tpl_args).hosts() + self.assertIsInstance(hosts, Iterator) + self.assertEqual(next(hosts), addrs[0]) self.assertEqual(addrs, list(ipaddress.ip_network(str_args).hosts())) self.assertEqual(addrs, list(ipaddress.ip_network(tpl_args).hosts())) self.assertEqual(list(ipaddress.ip_network(str_args).hosts()), @@ -2178,6 +2225,11 @@ def testIPv6AddressTooLarge(self): self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255'), + ipaddress.ip_address('::ffff:c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255'), + ipaddress.ip_address('ffff::c0a8:ffff')) + self.assertEqual(ipaddress.ip_address('::FFFF:192.0.2.1%scope'), ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertEqual(ipaddress.ip_address('FFFF::192.0.2.1%scope'), @@ -2190,6 +2242,10 @@ def testIPv6AddressTooLarge(self): ipaddress.ip_address('::FFFF:c000:201%scope')) self.assertNotEqual(ipaddress.ip_address('FFFF::192.0.2.1'), ipaddress.ip_address('FFFF::c000:201%scope')) + self.assertEqual(ipaddress.ip_address('0000:0000:0000:0000:0000:FFFF:192.168.255.255%scope'), + ipaddress.ip_address('::ffff:c0a8:ffff%scope')) + self.assertEqual(ipaddress.ip_address('FFFF:0000:0000:0000:0000:0000:192.168.255.255%scope'), + ipaddress.ip_address('ffff::c0a8:ffff%scope')) def testIPVersion(self): self.assertEqual(ipaddress.IPv4Address.version, 4) @@ -2599,6 +2655,10 @@ def testCompressIPv6Address(self): '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128', '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128', '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128', + '0000:0000:0000:0000:0000:0000:255.255.255.255': '::ffff:ffff/128', + '0000:0000:0000:0000:0000:ffff:255.255.255.255': '::ffff:255.255.255.255/128', + 'ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255': + 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128', } for uncompressed, compressed in list(test_addresses.items()): self.assertEqual(compressed, str(ipaddress.IPv6Interface( @@ -2762,6 +2822,34 @@ def testV6HashIsNotConstant(self): ipv6_address2 = ipaddress.IPv6Interface("2001:658:22a:cafe:200:0:0:2") self.assertNotEqual(ipv6_address1.__hash__(), ipv6_address2.__hash__()) + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV4HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.255/32").__hash__(), + ipaddress.IPv4Network("192.168.1.0/24").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("172.24.255.0/24").__hash__(), + ipaddress.IPv4Network("172.24.0.0/16").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("192.168.1.87/32").__hash__(), + ipaddress.IPv4Network("192.168.1.86/31").__hash__() + ) + + # issue 134062 Hash collisions in IPv4Network and IPv6Network + def testNetworkV6HashCollisions(self): + self.assertNotEqual( + ipaddress.IPv6Network("fe80::/64").__hash__(), + ipaddress.IPv6Network("fe80::ffff:ffff:ffff:0/112").__hash__() + ) + self.assertNotEqual( + ipaddress.IPv4Network("10.0.0.0/8").__hash__(), + ipaddress.IPv6Network( + "ffff:ffff:ffff:ffff:ffff:ffff:aff:0/112" + ).__hash__() + ) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index daad00e86432d0..f440fc28ee7b7d 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -318,6 +318,7 @@ def __bases__(self): self.assertRaises(RecursionError, isinstance, 1, X()) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_infinite_recursion_via_bases_tuple(self): """Regression test for bpo-30570.""" class Failure(object): @@ -328,6 +329,7 @@ def __getattr__(self, attr): issubclass(Failure(), int) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_infinite_cycle_in_bases(self): """Regression test for bpo-30570.""" class X: diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 1b9f3cf76240ad..18e4b676c53236 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -1147,7 +1147,7 @@ def test_error_iter(self): def test_exception_locations(self): # The location of an exception raised from __init__ or - # __next__ should should be the iterator expression + # __next__ should be the iterator expression def init_raises(): try: diff --git a/Lib/test/test_json/test_dump.py b/Lib/test/test_json/test_dump.py index 13b40020781bae..39470754003bb6 100644 --- a/Lib/test/test_json/test_dump.py +++ b/Lib/test/test_json/test_dump.py @@ -22,6 +22,14 @@ def test_dump_skipkeys(self): self.assertIn('valid_key', o) self.assertNotIn(b'invalid_key', o) + def test_dump_skipkeys_indent_empty(self): + v = {b'invalid_key': False} + self.assertEqual(self.json.dumps(v, skipkeys=True, indent=4), '{}') + + def test_skipkeys_indent(self): + v = {b'invalid_key': False, 'valid_key': True} + self.assertEqual(self.json.dumps(v, skipkeys=True, indent=4), '{\n "valid_key": true\n}') + def test_encode_truefalse(self): self.assertEqual(self.dumps( {True: False, False: True}, sort_keys=True), diff --git a/Lib/test/test_json/test_encode_basestring_ascii.py b/Lib/test/test_json/test_encode_basestring_ascii.py index 6a39b72a09df35..c90d3e968e5ef9 100644 --- a/Lib/test/test_json/test_encode_basestring_ascii.py +++ b/Lib/test/test_json/test_encode_basestring_ascii.py @@ -8,13 +8,12 @@ ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), ('controls', '"controls"'), ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + ('\x00\x1f\x7f', '"\\u0000\\u001f\\u007f"'), ('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), (' s p a c e d ', '" s p a c e d "'), ('\U0001d120', '"\\ud834\\udd20"'), ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), - ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), - ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), ] class TestEncodeBasestringAscii: diff --git a/Lib/test/test_json/test_fail.py b/Lib/test/test_json/test_fail.py index 7c1696cc66d12b..79c44af2fbf0e1 100644 --- a/Lib/test/test_json/test_fail.py +++ b/Lib/test/test_json/test_fail.py @@ -102,7 +102,7 @@ def test_not_serializable(self): with self.assertRaisesRegex(TypeError, 'Object of type module is not JSON serializable') as cm: self.dumps(sys) - self.assertFalse(hasattr(cm.exception, '__notes__')) + self.assertNotHasAttr(cm.exception, '__notes__') with self.assertRaises(TypeError) as cm: self.dumps([1, [2, 3, sys]]) diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index d82093f3895167..40a0baa53f0c3b 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -69,8 +69,9 @@ def default(self, o): @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_highly_nested_objects_decoding(self): - very_deep = 200000 + very_deep = 500_000 # test that loading highly-nested objects doesn't segfault when C # accelerations are used. See #12017 with self.assertRaises(RecursionError): @@ -85,10 +86,11 @@ def test_highly_nested_objects_decoding(self): @support.skip_wasi_stack_overflow() @support.skip_emscripten_stack_overflow() + @support.requires_resource('cpu') def test_highly_nested_objects_encoding(self): # See #12051 l, d = [], {} - for x in range(200_000): + for x in range(500_000): l, d = [l], {'k':d} with self.assertRaises(RecursionError): with support.infinite_recursion(5000): @@ -98,6 +100,7 @@ def test_highly_nested_objects_encoding(self): self.dumps(d) @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() def test_endless_recursion(self): # See #12051 class EndlessJSONEncoder(self.json.JSONEncoder): diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py index cca556a3b95bab..9a6cdfe12d266c 100644 --- a/Lib/test/test_json/test_scanstring.py +++ b/Lib/test/test_json/test_scanstring.py @@ -144,7 +144,7 @@ def test_bad_escapes(self): def test_overflow(self): with self.assertRaises(OverflowError): - self.json.decoder.scanstring(b"xxx", sys.maxsize+1) + self.json.decoder.scanstring("xxx", sys.maxsize+1) class TestPyScanstring(TestScanstring, PyTest): pass diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index 72cde3f0d6c1bd..7b5d217a21558c 100644 --- a/Lib/test/test_json/test_tool.py +++ b/Lib/test/test_json/test_tool.py @@ -13,6 +13,7 @@ @support.requires_subprocess() +@support.skip_if_pgo_task class TestMain(unittest.TestCase): data = """ @@ -160,7 +161,7 @@ def test_help_flag(self): rc, out, err = assert_python_ok('-m', self.module, '-h', PYTHON_COLORS='0') self.assertEqual(rc, 0) - self.assertTrue(out.startswith(b'usage: ')) + self.assertStartsWith(out, b'usage: ') self.assertEqual(err, b'') def test_sort_keys_flag(self): @@ -270,7 +271,7 @@ def test_colors(self): (r'" \"foo\" "', f'{t.string}" \\"foo\\" "{t.reset}'), ('"α"', f'{t.string}"\\u03b1"{t.reset}'), ('123', f'{t.number}123{t.reset}'), - ('-1.2345e+23', f'{t.number}-1.2345e+23{t.reset}'), + ('-1.25e+23', f'{t.number}-1.25e+23{t.reset}'), (r'{"\\": ""}', f'''\ {ob} @@ -319,6 +320,7 @@ def test_colors(self): @support.requires_subprocess() +@support.skip_if_pgo_task class TestTool(TestMain): module = 'json.tool' diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py index 68629cceeb9be9..1aa9546dc46306 100644 --- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -32,6 +32,29 @@ def test_encoding7(self): j = self.dumps(u + "\n", ensure_ascii=False) self.assertEqual(j, f'"{u}\\n"') + def test_ascii_non_printable_encode(self): + u = '\b\t\n\f\r\x00\x1f\x7f' + self.assertEqual(self.dumps(u), + '"\\b\\t\\n\\f\\r\\u0000\\u001f\\u007f"') + self.assertEqual(self.dumps(u, ensure_ascii=False), + '"\\b\\t\\n\\f\\r\\u0000\\u001f\x7f"') + + def test_ascii_non_printable_decode(self): + self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'), + '\b\t\n\f\r') + s = ''.join(map(chr, range(32))) + for c in s: + self.assertRaises(self.JSONDecodeError, self.loads, f'"{c}"') + self.assertEqual(self.loads(f'"{s}"', strict=False), s) + self.assertEqual(self.loads('"\x7f"'), '\x7f') + + def test_escaped_decode(self): + self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'), '\b\t\n\f\r') + self.assertEqual(self.loads('"\\"\\\\\\/"'), '"\\/') + for c in set(map(chr, range(0x100))) - set('"\\/bfnrt'): + self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"') + self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"', strict=False) + def test_big_unicode_encode(self): u = '\U0001d120' self.assertEqual(self.dumps(u), '"\\ud834\\udd20"') @@ -48,6 +71,18 @@ def test_unicode_decode(self): s = f'"\\u{i:04x}"' self.assertEqual(self.loads(s), u) + def test_single_surrogate_encode(self): + self.assertEqual(self.dumps('\uD83D'), '"\\ud83d"') + self.assertEqual(self.dumps('\uD83D', ensure_ascii=False), '"\ud83d"') + self.assertEqual(self.dumps('\uDC0D'), '"\\udc0d"') + self.assertEqual(self.dumps('\uDC0D', ensure_ascii=False), '"\udc0d"') + + def test_single_surrogate_decode(self): + self.assertEqual(self.loads('"\uD83D"'), '\ud83d') + self.assertEqual(self.loads('"\\uD83D"'), '\ud83d') + self.assertEqual(self.loads('"\udc0d"'), '\udc0d') + self.assertEqual(self.loads('"\\udc0d"'), '\udc0d') + def test_unicode_preservation(self): self.assertEqual(type(self.loads('""')), str) self.assertEqual(type(self.loads('"a"')), str) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 173fc743cf68ae..caa1603c78eb01 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -443,7 +443,7 @@ def test_search_major_3(self): except subprocess.CalledProcessError: raise unittest.SkipTest("requires at least one Python 3.x install") self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("3."), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "3.") def test_search_major_3_32(self): try: @@ -453,8 +453,8 @@ def test_search_major_3_32(self): raise unittest.SkipTest("requires at least one 32-bit Python 3.x install") raise self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("3."), data["env.tag"]) - self.assertTrue(data["env.tag"].endswith("-32"), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "3.") + self.assertEndsWith(data["env.tag"], "-32") def test_search_major_2(self): try: @@ -463,7 +463,7 @@ def test_search_major_2(self): if not is_installed("2.7"): raise unittest.SkipTest("requires at least one Python 2.x install") self.assertEqual("PythonCore", data["env.company"]) - self.assertTrue(data["env.tag"].startswith("2."), data["env.tag"]) + self.assertStartsWith(data["env.tag"], "2.") def test_py_default(self): with self.py_ini(TEST_PY_DEFAULTS): diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index cffdeeacc5d73b..ad7f62fbf78d60 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -716,7 +716,7 @@ def test_multiple_comprehension_name_reuse(self): def test_exception_locations(self): # The location of an exception raised from __init__ or - # __next__ should should be the iterator expression + # __next__ should be the iterator expression def init_raises(): try: @@ -750,6 +750,28 @@ def iter_raises(): self.assertEqual(f.line[f.colno - indent : f.end_colno - indent], expected) + def test_only_calls_dunder_iter_once(self): + + class Iterator: + + def __init__(self): + self.val = 0 + + def __next__(self): + if self.val == 2: + raise StopIteration + self.val += 1 + return self.val + + # No __iter__ method + + class C: + + def __iter__(self): + return Iterator() + + self.assertEqual([1, 2], [i for i in C()]) + __test__ = {'doctests' : doctests} def load_tests(loader, tests, pattern): diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 455d2af37efdc8..8e49aa8954ee0d 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -5,6 +5,7 @@ from unittest import mock import unittest import locale +import os import sys import codecs @@ -349,8 +350,7 @@ def setUp(self): enc = codecs.lookup(locale.getencoding() or 'ascii').name if enc not in ('utf-8', 'iso8859-1', 'cp1252'): raise unittest.SkipTest('encoding not suitable') - if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or - sys.platform.startswith('freebsd')): + if enc != 'iso8859-1' and is_android: raise unittest.SkipTest('wcscoll/wcsxfrm have known bugs') BaseLocalizedTest.setUp(self) @@ -387,6 +387,10 @@ def test_c(self): self.check('c', 'C') self.check('posix', 'C') + def test_c_utf8(self): + self.check('c.utf8', 'C.UTF-8') + self.check('C.UTF-8', 'C.UTF-8') + def test_english(self): self.check('en', 'en_US.ISO8859-1') self.check('EN', 'en_US.ISO8859-1') @@ -482,6 +486,54 @@ def test_japanese(self): self.check('jp_jp', 'ja_JP.eucJP') +class TestRealLocales(unittest.TestCase): + def setUp(self): + oldlocale = locale.setlocale(locale.LC_CTYPE) + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) + + def test_getsetlocale_issue1813(self): + # Issue #1813: setting and getting the locale under a Turkish locale + try: + locale.setlocale(locale.LC_CTYPE, 'tr_TR') + except locale.Error: + # Unsupported locale on this system + self.skipTest('test needs Turkish locale') + loc = locale.getlocale(locale.LC_CTYPE) + if verbose: + print('testing with %a' % (loc,), end=' ', flush=True) + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error as exc: + # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) + # and the tr_TR locale on Windows. getlocale() builds a locale + # which is not recognize by setlocale(). + self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") + self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_setlocale_long_encoding(self): + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'English.%016d' % 1252) + locale.setlocale(locale.LC_CTYPE, 'English.%015d' % 1252) + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.1252', loc) + loc2 = loc.replace('.1252', '.%016d' % 1252, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + loc2 = loc.replace('.1252', '.%015d' % 1252, 1) + locale.setlocale(locale.LC_ALL, loc2) + + # gh-137273: Debug assertion failure on Windows for long encoding. + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_CTYPE, 'en_US.' + 'x'*16) + locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') + loc = locale.setlocale(locale.LC_ALL) + self.assertIn('.UTF-8', loc) + loc2 = loc.replace('.UTF-8', '.' + 'x'*16, 1) + with self.assertRaises(locale.Error): + locale.setlocale(locale.LC_ALL, loc2) + + class TestMiscellaneous(unittest.TestCase): def test_defaults_UTF8(self): # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is @@ -548,27 +600,6 @@ def test_setlocale_category(self): # crasher from bug #7419 self.assertRaises(locale.Error, locale.setlocale, 12345) - def test_getsetlocale_issue1813(self): - # Issue #1813: setting and getting the locale under a Turkish locale - oldlocale = locale.setlocale(locale.LC_CTYPE) - self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) - try: - locale.setlocale(locale.LC_CTYPE, 'tr_TR') - except locale.Error: - # Unsupported locale on this system - self.skipTest('test needs Turkish locale') - loc = locale.getlocale(locale.LC_CTYPE) - if verbose: - print('testing with %a' % (loc,), end=' ', flush=True) - try: - locale.setlocale(locale.LC_CTYPE, loc) - except locale.Error as exc: - # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) - # and the tr_TR locale on Windows. getlocale() builds a locale - # which is not recognize by setlocale(). - self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") - self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) - def test_invalid_locale_format_in_localetuple(self): with self.assertRaises(TypeError): locale.setlocale(locale.LC_ALL, b'fi_FI') diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 3f113ec1be47af..99869fdd9e7061 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -1036,7 +1036,7 @@ class TestTCPServer(ControlMixin, ThreadingTCPServer): """ allow_reuse_address = True - allow_reuse_port = True + allow_reuse_port = False def __init__(self, addr, handler, poll_interval=0.5, bind_and_activate=True): @@ -5421,7 +5421,7 @@ def test_taskName_with_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) @support.requires_working_socket() def test_taskName_without_asyncio_imported(self): @@ -5433,7 +5433,7 @@ def test_taskName_without_asyncio_imported(self): logging.logAsyncioTasks = False runner.run(make_record(self.assertIsNone)) finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class BasicConfigTest(unittest.TestCase): @@ -5737,7 +5737,7 @@ async def log_record(): data = f.read().strip() self.assertRegex(data, r'Task-\d+ - hello world') finally: - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) if handler: handler.close() @@ -5800,7 +5800,7 @@ def cleanup(): self.addCleanup(cleanup) self.addCleanup(logging.shutdown) - self.adapter = logging.LoggerAdapter(logger=self.logger, extra=None) + self.adapter = logging.LoggerAdapter(logger=self.logger) def test_exception(self): msg = 'testing exception: %r' @@ -5971,6 +5971,18 @@ def test_extra_merged(self): self.assertEqual(record.foo, '1') self.assertEqual(record.bar, '2') + self.adapter.critical('no extra') # should not fail + self.assertEqual(len(self.recording.records), 2) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + self.assertNotHasAttr(record, 'bar') + + self.adapter.critical('none extra', extra=None) # should not fail + self.assertEqual(len(self.recording.records), 3) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + self.assertNotHasAttr(record, 'bar') + def test_extra_merged_log_call_has_precedence(self): self.adapter = logging.LoggerAdapter(logger=self.logger, extra={'foo': '1'}, @@ -5982,6 +5994,25 @@ def test_extra_merged_log_call_has_precedence(self): self.assertHasAttr(record, 'foo') self.assertEqual(record.foo, '2') + def test_extra_merged_without_extra(self): + self.adapter = logging.LoggerAdapter(logger=self.logger, + merge_extra=True) + + self.adapter.critical('foo should be here', extra={'foo': '1'}) + self.assertEqual(len(self.recording.records), 1) + record = self.recording.records[-1] + self.assertEqual(record.foo, '1') + + self.adapter.critical('no extra') # should not fail + self.assertEqual(len(self.recording.records), 2) + record = self.recording.records[-1] + self.assertNotHasAttr(record, 'foo') + + self.adapter.critical('none extra', extra=None) # should not fail + self.assertEqual(len(self.recording.records), 3) + record = self.recording.records[-1] + self.assertNotHasAttr(record, 'foo') + class PrefixAdapter(logging.LoggerAdapter): prefix = 'Adapter' diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index f336d49fa4f008..b48a8812a1a2d1 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -1374,17 +1374,22 @@ def equivalent_python(n, length, byteorder, signed=False): check(tests4, 'little', signed=False) self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=False) - self.assertRaises(OverflowError, (256).to_bytes, 1, 'big', signed=True) self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=False) - self.assertRaises(OverflowError, (256).to_bytes, 1, 'little', signed=True) + self.assertRaises(OverflowError, (128).to_bytes, 1, 'big', signed=True) + self.assertRaises(OverflowError, (128).to_bytes, 1, 'little', signed=True) + self.assertRaises(OverflowError, (-129).to_bytes, 1, 'big', signed=True) + self.assertRaises(OverflowError, (-129).to_bytes, 1, 'little', signed=True) self.assertRaises(OverflowError, (-1).to_bytes, 2, 'big', signed=False) self.assertRaises(OverflowError, (-1).to_bytes, 2, 'little', signed=False) self.assertEqual((0).to_bytes(0, 'big'), b'') + self.assertEqual((0).to_bytes(0, 'big', signed=True), b'') self.assertEqual((1).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x01') self.assertEqual((0).to_bytes(5, 'big'), b'\x00\x00\x00\x00\x00') self.assertEqual((-1).to_bytes(5, 'big', signed=True), b'\xff\xff\xff\xff\xff') self.assertRaises(OverflowError, (1).to_bytes, 0, 'big') + self.assertRaises(OverflowError, (-1).to_bytes, 0, 'big', signed=True) + self.assertRaises(OverflowError, (-1).to_bytes, 0, 'little', signed=True) # gh-98783 class SubStr(str): @@ -1693,5 +1698,21 @@ class MyInt(int): # GH-117195 -- This shouldn't crash object.__sizeof__(1) + def test_hash(self): + # gh-136599 + self.assertEqual(hash(-1), -2) + self.assertEqual(hash(0), 0) + self.assertEqual(hash(10), 10) + + self.assertEqual(hash(sys.hash_info.modulus - 2), sys.hash_info.modulus - 2) + self.assertEqual(hash(sys.hash_info.modulus - 1), sys.hash_info.modulus - 1) + self.assertEqual(hash(sys.hash_info.modulus), 0) + self.assertEqual(hash(sys.hash_info.modulus + 1), 1) + + self.assertEqual(hash(-sys.hash_info.modulus - 2), -2) + self.assertEqual(hash(-sys.hash_info.modulus - 1), -2) + self.assertEqual(hash(-sys.hash_info.modulus), 0) + self.assertEqual(hash(-sys.hash_info.modulus + 1), -sys.hash_info.modulus + 1) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py index 9ffb93e797dd80..e93c3c37354e27 100644 --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -1025,12 +1025,12 @@ def test_peek(self): with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: result = f.peek() self.assertGreater(len(result), 0) - self.assertTrue(INPUT.startswith(result)) + self.assertStartsWith(INPUT, result) self.assertEqual(f.read(), INPUT) with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: result = f.peek(10) self.assertGreater(len(result), 0) - self.assertTrue(INPUT.startswith(result)) + self.assertStartsWith(INPUT, result) self.assertEqual(f.read(), INPUT) def test_peek_bad_args(self): diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 913a60bf9e04e3..d14336f8bac498 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1214,6 +1214,12 @@ def testLdexp(self): self.assertEqual(math.ldexp(NINF, n), NINF) self.assertTrue(math.isnan(math.ldexp(NAN, n))) + @requires_IEEE_754 + def testLdexp_denormal(self): + # Denormal output incorrectly rounded (truncated) + # on some Windows. + self.assertEqual(math.ldexp(6993274598585239, -1126), 1e-323) + def testLog(self): self.assertRaises(TypeError, math.log) self.assertRaises(TypeError, math.log, 1, 2, 3) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 95629ed862d6eb..bb023735e21398 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -54,6 +54,12 @@ def testSeek(self): self.assertEqual(buf[3:], bytesIo.read()) self.assertRaises(TypeError, bytesIo.seek, 0.0) + self.assertEqual(sys.maxsize, bytesIo.seek(sys.maxsize)) + self.assertEqual(self.EOF, bytesIo.read(4)) + + self.assertEqual(sys.maxsize - 2, bytesIo.seek(sys.maxsize - 2)) + self.assertEqual(self.EOF, bytesIo.read(4)) + def testTell(self): buf = self.buftype("1234567890") bytesIo = self.ioclass(buf) @@ -265,8 +271,8 @@ def test_iterator(self): memio = self.ioclass(buf * 10) self.assertEqual(iter(memio), memio) - self.assertTrue(hasattr(memio, '__iter__')) - self.assertTrue(hasattr(memio, '__next__')) + self.assertHasAttr(memio, '__iter__') + self.assertHasAttr(memio, '__next__') i = 0 for line in memio: self.assertEqual(line, buf) @@ -552,6 +558,14 @@ def test_relative_seek(self): memio.seek(1, 1) self.assertEqual(memio.read(), buf[1:]) + def test_issue141311(self): + memio = self.ioclass() + # Seek allows PY_SSIZE_T_MAX, read should handle that. + # Past end of buffer read should always return 0 (EOF). + self.assertEqual(sys.maxsize, memio.seek(sys.maxsize)) + buf = bytearray(2) + self.assertEqual(0, memio.readinto(buf)) + def test_unicode(self): memio = self.ioclass() diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 61b068c630c7ce..64f440f180bbf0 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -743,19 +743,21 @@ def test_racing_getbuf_and_releasebuf(self): from multiprocessing.managers import SharedMemoryManager except ImportError: self.skipTest("Test requires multiprocessing") - from threading import Thread + from threading import Thread, Event - n = 100 + start = Event() with SharedMemoryManager() as smm: obj = smm.ShareableList(range(100)) - threads = [] - for _ in range(n): - # Issue gh-127085, the `ShareableList.count` is just a convenient way to mess the `exports` - # counter of `memoryview`, this issue has no direct relation with `ShareableList`. - threads.append(Thread(target=obj.count, args=(1,))) - + def test(): + # Issue gh-127085, the `ShareableList.count` is just a + # convenient way to mess the `exports` counter of `memoryview`, + # this issue has no direct relation with `ShareableList`. + start.wait(support.SHORT_TIMEOUT) + for i in range(10): + obj.count(1) + threads = [Thread(target=test) for _ in range(10)] with threading_helper.start_threads(threads): - pass + start.set() del obj diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index fb57d5e5544c12..c1806b1c133778 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -470,13 +470,31 @@ def test_parse_args(self): self.assertFalse(args.lenient) self.assertEqual(args.type, ["foo.pic"]) + def test_multiple_inputs(self): + result = "\n".join(mimetypes._main(shlex.split("foo.pdf foo.png"))) + self.assertEqual( + result, + "type: application/pdf encoding: None\n" + "type: image/png encoding: None" + ) + + def test_multiple_inputs_error(self): + result = "\n".join(mimetypes._main(shlex.split("foo.pdf foo.bar_ext"))) + self.assertEqual( + result, + "type: application/pdf encoding: None\n" + "error: media type unknown for foo.bar_ext" + ) + + def test_invocation(self): for command, expected in [ ("-l -e image/jpg", ".jpg"), ("-e image/jpeg", ".jpg"), ("-l foo.webp", "type: image/webp encoding: None"), ]: - self.assertEqual(mimetypes._main(shlex.split(command)), expected) + result = "\n".join(mimetypes._main(shlex.split(command))) + self.assertEqual(result, expected) def test_invocation_error(self): for command, expected in [ @@ -484,8 +502,8 @@ def test_invocation_error(self): ("foo.bar_ext", "error: media type unknown for foo.bar_ext"), ]: with self.subTest(command=command): - with self.assertRaisesRegex(SystemExit, expected): - mimetypes._main(shlex.split(command)) + result = "\n".join(mimetypes._main(shlex.split(command))) + self.assertEqual(result, expected) if __name__ == "__main__": diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 4f25e9c2a03cb4..4fa5a4e6768b25 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -2,6 +2,7 @@ import copy import pickle +import time import io from test import support import unittest @@ -173,6 +174,23 @@ def testAppendChild(self): self.assertEqual(dom.documentElement.childNodes[-1].data, "Hello") dom.unlink() + def testAppendChildNoQuadraticComplexity(self): + impl = getDOMImplementation() + + newdoc = impl.createDocument(None, "some_tag", None) + top_element = newdoc.documentElement + children = [newdoc.createElement(f"child-{i}") for i in range(1, 2 ** 15 + 1)] + element = top_element + + start = time.time() + for child in children: + element.appendChild(child) + element = child + end = time.time() + + # This example used to take at least 30 seconds. + self.assertLess(end - start, 1) + def testAppendChildFragment(self): dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes() dom.documentElement.appendChild(frag) diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index fd4197b7086976..f468dda1d163c4 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -732,7 +732,7 @@ def test_tagname(self): m2.close() m1.close() - with self.assertRaisesRegex(TypeError, 'must be str or None'): + with self.assertRaisesRegex(TypeError, 'tagname'): mmap.mmap(-1, 8, tagname=1) @cpython_only @@ -887,6 +887,10 @@ def test_madvise(self): size = 2 * PAGESIZE m = mmap.mmap(-1, size) + class Number: + def __index__(self): + return 2 + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): m.madvise(mmap.MADV_NORMAL, size) with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): @@ -895,41 +899,79 @@ def test_madvise(self): m.madvise(mmap.MADV_NORMAL, 0, -1) with self.assertRaisesRegex(OverflowError, "madvise length too large"): m.madvise(mmap.MADV_NORMAL, PAGESIZE, sys.maxsize) + with self.assertRaisesRegex( + TypeError, "'str' object cannot be interpreted as an integer"): + m.madvise(mmap.MADV_NORMAL, PAGESIZE, "Not a Number") self.assertEqual(m.madvise(mmap.MADV_NORMAL), None) self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE), None) self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE, size), None) self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, 2), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, Number()), None) self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, size), None) - @unittest.skipUnless(os.name == 'nt', 'requires Windows') - def test_resize_up_when_mapped_to_pagefile(self): + def test_resize_up_anonymous_mapping(self): """If the mmap is backed by the pagefile ensure a resize up can happen and that the original data is still in place """ start_size = PAGESIZE new_size = 2 * start_size - data = bytes(random.getrandbits(8) for _ in range(start_size)) + data = random.randbytes(start_size) - m = mmap.mmap(-1, start_size) - m[:] = data - m.resize(new_size) - self.assertEqual(len(m), new_size) - self.assertEqual(m[:start_size], data[:start_size]) + with mmap.mmap(-1, start_size) as m: + m[:] = data + if sys.platform.startswith(('linux', 'android')): + # Can't expand a shared anonymous mapping on Linux. + # See https://bugzilla.kernel.org/show_bug.cgi?id=8691 + with self.assertRaises(ValueError): + m.resize(new_size) + else: + try: + m.resize(new_size) + except SystemError: + pass + else: + self.assertEqual(len(m), new_size) + self.assertEqual(m[:start_size], data) + self.assertEqual(m[start_size:], b'\0' * (new_size - start_size)) - @unittest.skipUnless(os.name == 'nt', 'requires Windows') - def test_resize_down_when_mapped_to_pagefile(self): + @unittest.skipUnless(os.name == 'posix', 'requires Posix') + def test_resize_up_private_anonymous_mapping(self): + start_size = PAGESIZE + new_size = 2 * start_size + data = random.randbytes(start_size) + + with mmap.mmap(-1, start_size, flags=mmap.MAP_PRIVATE) as m: + m[:] = data + try: + m.resize(new_size) + except SystemError: + pass + else: + self.assertEqual(len(m), new_size) + self.assertEqual(m[:start_size], data) + self.assertEqual(m[start_size:], b'\0' * (new_size - start_size)) + + def test_resize_down_anonymous_mapping(self): """If the mmap is backed by the pagefile ensure a resize down up can happen and that a truncated form of the original data is still in place """ - start_size = PAGESIZE + start_size = 2 * PAGESIZE new_size = start_size // 2 - data = bytes(random.getrandbits(8) for _ in range(start_size)) + data = random.randbytes(start_size) - m = mmap.mmap(-1, start_size) - m[:] = data - m.resize(new_size) - self.assertEqual(len(m), new_size) - self.assertEqual(m[:new_size], data[:new_size]) + with mmap.mmap(-1, start_size) as m: + m[:] = data + try: + m.resize(new_size) + except SystemError: + pass + else: + self.assertEqual(len(m), new_size) + self.assertEqual(m[:], data[:new_size]) + if sys.platform.startswith(('linux', 'android')): + # Can't expand to its original size. + with self.assertRaises(ValueError): + m.resize(start_size) @unittest.skipUnless(os.name == 'nt', 'requires Windows') def test_resize_fails_if_mapping_held_elsewhere(self): diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index 263e4e6f394155..de658c597a197d 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -3,6 +3,7 @@ import collections import dis import functools +import inspect import math import operator import sys @@ -11,10 +12,10 @@ import unittest import test.support -from test.support import requires_specialization_ft, script_helper +from test.support import import_helper, requires_specialization_ft, script_helper -_testcapi = test.support.import_helper.import_module("_testcapi") -_testinternalcapi = test.support.import_helper.import_module("_testinternalcapi") +_testcapi = import_helper.import_module("_testcapi") +_testinternalcapi = import_helper.import_module("_testinternalcapi") PAIR = (0,1) @@ -1078,6 +1079,25 @@ def f(): self.assertEqual(events, expected) + # gh-140373 + def test_gen_unwind(self): + def gen(): + yield 1 + + def f(): + g = gen() + next(g) + g.close() + + recorders = ( + UnwindRecorder, + ) + events = self.get_events(f, TEST_TOOL, recorders) + expected = [ + ("unwind", GeneratorExit, "gen"), + ] + self.assertEqual(events, expected) + class LineRecorder: event_type = E.LINE @@ -1709,6 +1729,27 @@ def func(v=1): ('branch right', 'func', 6, 8), ('branch right', 'func', 2, 10)]) + def test_callback_set_frame_lineno(self): + def func(s: str) -> int: + if s.startswith("t"): + return 1 + else: + return 0 + + def callback(code, from_, to): + # try set frame.f_lineno + frame = inspect.currentframe() + while frame and frame.f_code is not code: + frame = frame.f_back + + self.assertIsNotNone(frame) + frame.f_lineno = frame.f_lineno + 1 # run next instruction + + sys.monitoring.set_local_events(TEST_TOOL, func.__code__, E.BRANCH_LEFT) + sys.monitoring.register_callback(TEST_TOOL, E.BRANCH_LEFT, callback) + + self.assertEqual(func("true"), 1) + class TestBranchConsistency(MonitoringTestBase, unittest.TestCase): diff --git a/Lib/test/test_netrc.py b/Lib/test/test_netrc.py index 81e11a293cc4c8..9d720f627102e3 100644 --- a/Lib/test/test_netrc.py +++ b/Lib/test/test_netrc.py @@ -1,11 +1,7 @@ import netrc, os, unittest, sys, textwrap +from test import support from test.support import os_helper -try: - import pwd -except ImportError: - pwd = None - temp_filename = os_helper.TESTFN class NetrcTestCase(unittest.TestCase): @@ -269,9 +265,14 @@ def test_comment_at_end_of_machine_line_pass_has_hash(self): machine bar.domain.com login foo password pass """, '#pass') + @unittest.skipUnless(support.is_wasi, 'WASI only test') + def test_security_on_WASI(self): + self.assertFalse(netrc._can_security_check()) + self.assertEqual(netrc._getpwuid(0), 'uid 0') + self.assertEqual(netrc._getpwuid(123456), 'uid 123456') @unittest.skipUnless(os.name == 'posix', 'POSIX only test') - @unittest.skipIf(pwd is None, 'security check requires pwd module') + @unittest.skipUnless(hasattr(os, 'getuid'), "os.getuid is required") @os_helper.skip_unless_working_chmod def test_security(self): # This test is incomplete since we are normally not run as root and diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index c10387b58e3f9c..9270f3257068d6 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -6,8 +6,9 @@ import sys import unittest import warnings -from test.support import cpython_only, os_helper -from test.support import TestFailed, is_emscripten +from test import support +from test.support import os_helper +from ntpath import ALLOW_MISSING from test.support.os_helper import FakePath from test import test_genericpath from tempfile import TemporaryFile @@ -57,7 +58,7 @@ def tester(fn, wantResult): fn = fn.replace("\\", "\\\\") gotResult = eval(fn) if wantResult != gotResult and _norm(wantResult) != _norm(gotResult): - raise TestFailed("%s should return: %s but returned: %s" \ + raise support.TestFailed("%s should return: %s but returned: %s" \ %(str(fn), str(wantResult), str(gotResult))) # then with bytes @@ -73,10 +74,14 @@ def tester(fn, wantResult): warnings.simplefilter("ignore", DeprecationWarning) gotResult = eval(fn) if _norm(wantResult) != _norm(gotResult): - raise TestFailed("%s should return: %s but returned: %s" \ + raise support.TestFailed("%s should return: %s but returned: %s" \ %(str(fn), str(wantResult), repr(gotResult))) +def _parameterize(*parameters): + return support.subTests('kwargs', parameters, _do_cleanups=True) + + class NtpathTestCase(unittest.TestCase): def assertPathEqual(self, path1, path2): if path1 == path2 or _norm(path1) == _norm(path2): @@ -124,6 +129,22 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + def test_splitdrive_invalid_paths(self): + splitdrive = ntpath.splitdrive + self.assertEqual(splitdrive('\\\\ser\x00ver\\sha\x00re\\di\x00r'), + ('\\\\ser\x00ver\\sha\x00re', '\\di\x00r')) + self.assertEqual(splitdrive(b'\\\\ser\x00ver\\sha\x00re\\di\x00r'), + (b'\\\\ser\x00ver\\sha\x00re', b'\\di\x00r')) + self.assertEqual(splitdrive("\\\\\udfff\\\udffe\\\udffd"), + ('\\\\\udfff\\\udffe', '\\\udffd')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\\xff\\share\\dir') + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\server\\\xff\\dir') + self.assertRaises(UnicodeDecodeError, splitdrive, b'\\\\server\\share\\\xff') + else: + self.assertEqual(splitdrive(b'\\\\\xff\\\xfe\\\xfd'), + (b'\\\\\xff\\\xfe', b'\\\xfd')) + def test_splitroot(self): tester("ntpath.splitroot('')", ('', '', '')) tester("ntpath.splitroot('foo')", ('', '', 'foo')) @@ -214,6 +235,22 @@ def test_splitroot(self): tester('ntpath.splitroot(" :/foo")', (" :", "/", "foo")) tester('ntpath.splitroot("/:/foo")', ("", "/", ":/foo")) + def test_splitroot_invalid_paths(self): + splitroot = ntpath.splitroot + self.assertEqual(splitroot('\\\\ser\x00ver\\sha\x00re\\di\x00r'), + ('\\\\ser\x00ver\\sha\x00re', '\\', 'di\x00r')) + self.assertEqual(splitroot(b'\\\\ser\x00ver\\sha\x00re\\di\x00r'), + (b'\\\\ser\x00ver\\sha\x00re', b'\\', b'di\x00r')) + self.assertEqual(splitroot("\\\\\udfff\\\udffe\\\udffd"), + ('\\\\\udfff\\\udffe', '\\', '\udffd')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\\xff\\share\\dir') + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\server\\\xff\\dir') + self.assertRaises(UnicodeDecodeError, splitroot, b'\\\\server\\share\\\xff') + else: + self.assertEqual(splitroot(b'\\\\\xff\\\xfe\\\xfd'), + (b'\\\\\xff\\\xfe', b'\\', b'\xfd')) + def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")', @@ -226,6 +263,21 @@ def test_split(self): tester('ntpath.split("c:/")', ('c:/', '')) tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint/', '')) + def test_split_invalid_paths(self): + split = ntpath.split + self.assertEqual(split('c:\\fo\x00o\\ba\x00r'), + ('c:\\fo\x00o', 'ba\x00r')) + self.assertEqual(split(b'c:\\fo\x00o\\ba\x00r'), + (b'c:\\fo\x00o', b'ba\x00r')) + self.assertEqual(split('c:\\\udfff\\\udffe'), + ('c:\\\udfff', '\udffe')) + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, split, b'c:\\\xff\\bar') + self.assertRaises(UnicodeDecodeError, split, b'c:\\foo\\\xff') + else: + self.assertEqual(split(b'c:\\\xff\\\xfe'), + (b'c:\\\xff', b'\xfe')) + def test_isabs(self): tester('ntpath.isabs("foo\\bar")', 0) tester('ntpath.isabs("foo/bar")', 0) @@ -333,6 +385,30 @@ def test_join(self): tester("ntpath.join('D:a', './c:b')", 'D:a\\.\\c:b') tester("ntpath.join('D:/a', './c:b')", 'D:\\a\\.\\c:b') + def test_normcase(self): + normcase = ntpath.normcase + self.assertEqual(normcase(''), '') + self.assertEqual(normcase(b''), b'') + self.assertEqual(normcase('ABC'), 'abc') + self.assertEqual(normcase(b'ABC'), b'abc') + self.assertEqual(normcase('\xc4\u0141\u03a8'), '\xe4\u0142\u03c8') + expected = '\u03c9\u2126' if sys.platform == 'win32' else '\u03c9\u03c9' + self.assertEqual(normcase('\u03a9\u2126'), expected) + if sys.platform == 'win32' or sys.getfilesystemencoding() == 'utf-8': + self.assertEqual(normcase('\xc4\u0141\u03a8'.encode()), + '\xe4\u0142\u03c8'.encode()) + self.assertEqual(normcase('\u03a9\u2126'.encode()), + expected.encode()) + + def test_normcase_invalid_paths(self): + normcase = ntpath.normcase + self.assertEqual(normcase('abc\x00def'), 'abc\x00def') + self.assertEqual(normcase(b'abc\x00def'), b'abc\x00def') + self.assertEqual(normcase('\udfff'), '\udfff') + if sys.platform == 'win32': + path = b'ABC' + bytes(range(128, 256)) + self.assertEqual(normcase(path), path.lower()) + def test_normpath(self): tester("ntpath.normpath('A//////././//.//B')", r'A\B') tester("ntpath.normpath('A/./B')", r'A\B') @@ -381,6 +457,21 @@ def test_normpath(self): tester("ntpath.normpath('\\\\')", '\\\\') tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') + def test_normpath_invalid_paths(self): + normpath = ntpath.normpath + self.assertEqual(normpath('fo\x00o'), 'fo\x00o') + self.assertEqual(normpath(b'fo\x00o'), b'fo\x00o') + self.assertEqual(normpath('fo\x00o\\..\\bar'), 'bar') + self.assertEqual(normpath(b'fo\x00o\\..\\bar'), b'bar') + self.assertEqual(normpath('\udfff'), '\udfff') + self.assertEqual(normpath('\udfff\\..\\foo'), 'foo') + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, normpath, b'\xff') + self.assertRaises(UnicodeDecodeError, normpath, b'\xff\\..\\foo') + else: + self.assertEqual(normpath(b'\xff'), b'\xff') + self.assertEqual(normpath(b'\xff\\..\\foo'), b'foo') + def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) tester("ntpath.realpath('.')", expected) @@ -389,6 +480,27 @@ def test_realpath_curdir(self): tester("ntpath.realpath('.\\.')", expected) tester("ntpath.realpath('\\'.join(['.'] * 100))", expected) + def test_realpath_curdir_strict(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.', strict=True)", expected) + tester("ntpath.realpath('./.', strict=True)", expected) + tester("ntpath.realpath('/'.join(['.'] * 100), strict=True)", expected) + tester("ntpath.realpath('.\\.', strict=True)", expected) + tester("ntpath.realpath('\\'.join(['.'] * 100), strict=True)", expected) + + def test_realpath_curdir_missing_ok(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('./.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('/'.join(['.'] * 100), strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('.\\.', strict=ALLOW_MISSING)", + expected) + tester("ntpath.realpath('\\'.join(['.'] * 100), strict=ALLOW_MISSING)", + expected) + def test_realpath_pardir(self): expected = ntpath.normpath(os.getcwd()) tester("ntpath.realpath('..')", ntpath.dirname(expected)) @@ -401,28 +513,59 @@ def test_realpath_pardir(self): tester("ntpath.realpath('\\'.join(['..'] * 50))", ntpath.splitdrive(expected)[0] + '\\') + def test_realpath_pardir_strict(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..', strict=True)", ntpath.dirname(expected)) + tester("ntpath.realpath('../..', strict=True)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50), strict=True)", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..', strict=True)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50), strict=True)", + ntpath.splitdrive(expected)[0] + '\\') + + def test_realpath_pardir_missing_ok(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..', strict=ALLOW_MISSING)", + ntpath.dirname(expected)) + tester("ntpath.realpath('../..', strict=ALLOW_MISSING)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50), strict=ALLOW_MISSING)", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..', strict=ALLOW_MISSING)", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50), strict=ALLOW_MISSING)", + ntpath.splitdrive(expected)[0] + '\\') + @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_basic(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_basic(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) open(ABSTFN, "wb").close() self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN + "1") os.symlink(ABSTFN, ABSTFN + "1") - self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) - self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), + self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) + self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1"), **kwargs), os.fsencode(ABSTFN)) # gh-88013: call ntpath.realpath with binary drive name may raise a # TypeError. The drive should not exist to reproduce the bug. drives = {f"{c}:\\" for c in string.ascii_uppercase} - set(os.listdrives()) d = drives.pop().encode() - self.assertEqual(ntpath.realpath(d), d) + self.assertEqual(ntpath.realpath(d, strict=False), d) # gh-106242: Embedded nulls and non-strict fallback to abspath - self.assertEqual(ABSTFN + "\0spam", - ntpath.realpath(os_helper.TESTFN + "\0spam", strict=False)) + if kwargs: + with self.assertRaises(OSError): + ntpath.realpath(os_helper.TESTFN + "\0spam", + **kwargs) + else: + self.assertEqual(ABSTFN + "\0spam", + ntpath.realpath(os_helper.TESTFN + "\0spam", **kwargs)) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @@ -434,19 +577,77 @@ def test_realpath_strict(self): self.addCleanup(os_helper.unlink, ABSTFN) self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True) self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True) + + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_invalid_paths(self): + realpath = ntpath.realpath + ABSTFN = ntpath.abspath(os_helper.TESTFN) + ABSTFNb = os.fsencode(ABSTFN) + path = ABSTFN + '\x00' + # gh-106242: Embedded nulls and non-strict fallback to abspath + self.assertEqual(realpath(path, strict=False), path) # gh-106242: Embedded nulls should raise OSError (not ValueError) - self.assertRaises(OSError, ntpath.realpath, ABSTFN + "\0spam", strict=True) + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + path = ABSTFNb + b'\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + path = ABSTFN + '\\nonexistent\\x\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + path = ABSTFNb + b'\\nonexistent\\x\x00' + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + path = ABSTFN + '\x00\\..' + self.assertEqual(realpath(path, strict=False), os.getcwd()) + self.assertEqual(realpath(path, strict=True), os.getcwd()) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwd()) + path = ABSTFNb + b'\x00\\..' + self.assertEqual(realpath(path, strict=False), os.getcwdb()) + self.assertEqual(realpath(path, strict=True), os.getcwdb()) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwdb()) + path = ABSTFN + '\\nonexistent\\x\x00\\..' + self.assertEqual(realpath(path, strict=False), ABSTFN + '\\nonexistent') + self.assertRaises(OSError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFN + '\\nonexistent') + path = ABSTFNb + b'\\nonexistent\\x\x00\\..' + self.assertEqual(realpath(path, strict=False), ABSTFNb + b'\\nonexistent') + self.assertRaises(OSError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFNb + b'\\nonexistent') + + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_invalid_unicode_paths(self, kwargs): + realpath = ntpath.realpath + ABSTFN = ntpath.abspath(os_helper.TESTFN) + ABSTFNb = os.fsencode(ABSTFN) + path = ABSTFNb + b'\xff' + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + path = ABSTFNb + b'\\nonexistent\\\xff' + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + path = ABSTFNb + b'\xff\\..' + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + path = ABSTFNb + b'\\nonexistent\\\xff\\..' + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) + self.assertRaises(UnicodeDecodeError, realpath, path, **kwargs) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_relative(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_relative(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) open(ABSTFN, "wb").close() self.addCleanup(os_helper.unlink, ABSTFN) self.addCleanup(os_helper.unlink, ABSTFN + "1") os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1")) - self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @@ -598,7 +799,62 @@ def test_realpath_symlink_loops_strict(self): @os_helper.skip_unless_symlink @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') - def test_realpath_symlink_prefix(self): + def test_realpath_symlink_loops_raise(self): + # Symlink loops raise OSError in ALLOW_MISSING mode + ABSTFN = ntpath.abspath(os_helper.TESTFN) + self.addCleanup(os_helper.unlink, ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "1") + self.addCleanup(os_helper.unlink, ABSTFN + "2") + self.addCleanup(os_helper.unlink, ABSTFN + "y") + self.addCleanup(os_helper.unlink, ABSTFN + "c") + self.addCleanup(os_helper.unlink, ABSTFN + "a") + self.addCleanup(os_helper.unlink, ABSTFN + "x") + + os.symlink(ABSTFN, ABSTFN) + self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=ALLOW_MISSING) + + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", + strict=ALLOW_MISSING) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", + strict=ALLOW_MISSING) + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", + strict=ALLOW_MISSING) + + # Windows eliminates '..' components before resolving links; + # realpath is not expected to raise if this removes the loop. + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\.."), + ntpath.dirname(ABSTFN)) + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), + ntpath.dirname(ABSTFN) + "\\x") + + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "y"), + ABSTFN + "x") + self.assertRaises( + OSError, ntpath.realpath, + ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1", + strict=ALLOW_MISSING) + + os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", + strict=ALLOW_MISSING) + + os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) + + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", + strict=ALLOW_MISSING) + + # Test using relative path as well. + self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN), + strict=ALLOW_MISSING) + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_symlink_prefix(self, kwargs): ABSTFN = ntpath.abspath(os_helper.TESTFN) self.addCleanup(os_helper.unlink, ABSTFN + "3") self.addCleanup(os_helper.unlink, "\\\\?\\" + ABSTFN + "3.") @@ -613,9 +869,9 @@ def test_realpath_symlink_prefix(self): f.write(b'1') os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link") - self.assertPathEqual(ntpath.realpath(ABSTFN + "3link"), + self.assertPathEqual(ntpath.realpath(ABSTFN + "3link", **kwargs), ABSTFN + "3") - self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link"), + self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link", **kwargs), "\\\\?\\" + ABSTFN + "3.") # Resolved paths should be usable to open target files @@ -625,14 +881,17 @@ def test_realpath_symlink_prefix(self): self.assertEqual(f.read(), b'1') # When the prefix is included, it is not stripped - self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"), + self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link", **kwargs), "\\\\?\\" + ABSTFN + "3") - self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"), + self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link", **kwargs), "\\\\?\\" + ABSTFN + "3.") @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_nul(self): tester("ntpath.realpath('NUL')", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=False)", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=True)", r'\\.\NUL') + tester("ntpath.realpath('NUL', strict=ALLOW_MISSING)", r'\\.\NUL') @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') @unittest.skipUnless(HAVE_GETSHORTPATHNAME, 'need _getshortpathname') @@ -656,12 +915,20 @@ def test_realpath_cwd(self): self.assertPathEqual(test_file_long, ntpath.realpath(test_file_short)) - with os_helper.change_cwd(test_dir_long): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) - with os_helper.change_cwd(test_dir_long.lower()): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) - with os_helper.change_cwd(test_dir_short): - self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) + for kwargs in {}, {'strict': True}, {'strict': ALLOW_MISSING}: + with self.subTest(**kwargs): + with os_helper.change_cwd(test_dir_long): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) + with os_helper.change_cwd(test_dir_long.lower()): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) + with os_helper.change_cwd(test_dir_short): + self.assertPathEqual( + test_file_long, + ntpath.realpath("file.txt", **kwargs)) @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') def test_realpath_permission(self): @@ -682,12 +949,15 @@ def test_realpath_permission(self): # Automatic generation of short names may be disabled on # NTFS volumes for the sake of performance. # They're not supported at all on ReFS and exFAT. - subprocess.run( + p = subprocess.run( # Try to set the short name manually. ['fsutil.exe', 'file', 'setShortName', test_file, 'LONGFI~1.TXT'], creationflags=subprocess.DETACHED_PROCESS ) + if p.returncode: + raise unittest.SkipTest('failed to set short name') + try: self.assertPathEqual(test_file, ntpath.realpath(test_file_short)) except AssertionError: @@ -751,6 +1021,19 @@ def check(value, expected): check('%spam%bar', '%sbar' % nonascii) check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii) + @support.requires_resource('cpu') + def test_expandvars_large(self): + expandvars = ntpath.expandvars + with os_helper.EnvironmentVarGuard() as env: + env.clear() + env["A"] = "B" + n = 100_000 + self.assertEqual(expandvars('%A%'*n), 'B'*n) + self.assertEqual(expandvars('%A%A'*n), 'BA'*n) + self.assertEqual(expandvars("''"*n + '%%'), "''"*n + '%') + self.assertEqual(expandvars("%%"*n), "%"*n) + self.assertEqual(expandvars("$$"*n), "$"*n) + def test_expanduser(self): tester('ntpath.expanduser("test")', 'test') @@ -812,8 +1095,6 @@ def test_abspath(self): tester('ntpath.abspath("C:/nul")', "\\\\.\\nul") tester('ntpath.abspath("C:\\nul")', "\\\\.\\nul") self.assertTrue(ntpath.isabs(ntpath.abspath("C:spam"))) - self.assertEqual(ntpath.abspath("C:\x00"), ntpath.join(ntpath.abspath("C:"), "\x00")) - self.assertEqual(ntpath.abspath("\x00:spam"), "\x00:\\spam") tester('ntpath.abspath("//..")', "\\\\") tester('ntpath.abspath("//../")', "\\\\..\\") tester('ntpath.abspath("//../..")', "\\\\..\\") @@ -847,6 +1128,26 @@ def test_abspath(self): drive, _ = ntpath.splitdrive(cwd_dir) tester('ntpath.abspath("/abc/")', drive + "\\abc") + def test_abspath_invalid_paths(self): + abspath = ntpath.abspath + if sys.platform == 'win32': + self.assertEqual(abspath("C:\x00"), ntpath.join(abspath("C:"), "\x00")) + self.assertEqual(abspath(b"C:\x00"), ntpath.join(abspath(b"C:"), b"\x00")) + self.assertEqual(abspath("\x00:spam"), "\x00:\\spam") + self.assertEqual(abspath(b"\x00:spam"), b"\x00:\\spam") + self.assertEqual(abspath('c:\\fo\x00o'), 'c:\\fo\x00o') + self.assertEqual(abspath(b'c:\\fo\x00o'), b'c:\\fo\x00o') + self.assertEqual(abspath('c:\\fo\x00o\\..\\bar'), 'c:\\bar') + self.assertEqual(abspath(b'c:\\fo\x00o\\..\\bar'), b'c:\\bar') + self.assertEqual(abspath('c:\\\udfff'), 'c:\\\udfff') + self.assertEqual(abspath('c:\\\udfff\\..\\foo'), 'c:\\foo') + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, abspath, b'c:\\\xff') + self.assertRaises(UnicodeDecodeError, abspath, b'c:\\\xff\\..\\foo') + else: + self.assertEqual(abspath(b'c:\\\xff'), b'c:\\\xff') + self.assertEqual(abspath(b'c:\\\xff\\..\\foo'), b'c:\\foo') + def test_relpath(self): tester('ntpath.relpath("a")', 'a') tester('ntpath.relpath(ntpath.abspath("a"))', 'a') @@ -989,6 +1290,18 @@ def test_ismount(self): self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$")) self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$\\")) + def test_ismount_invalid_paths(self): + ismount = ntpath.ismount + self.assertFalse(ismount("c:\\\udfff")) + if sys.platform == 'win32': + self.assertRaises(ValueError, ismount, "c:\\\x00") + self.assertRaises(ValueError, ismount, b"c:\\\x00") + self.assertRaises(UnicodeDecodeError, ismount, b"c:\\\xff") + else: + self.assertFalse(ismount("c:\\\x00")) + self.assertFalse(ismount(b"c:\\\x00")) + self.assertFalse(ismount(b"c:\\\xff")) + def test_isreserved(self): self.assertFalse(ntpath.isreserved('')) self.assertFalse(ntpath.isreserved('.')) @@ -1095,6 +1408,13 @@ def test_isjunction(self): self.assertFalse(ntpath.isjunction('tmpdir')) self.assertPathEqual(ntpath.realpath('testjunc'), ntpath.realpath('tmpdir')) + def test_isfile_invalid_paths(self): + isfile = ntpath.isfile + self.assertIs(isfile('/tmp\udfffabcds'), False) + self.assertIs(isfile(b'/tmp\xffabcds'), False) + self.assertIs(isfile('/tmp\x00abcds'), False) + self.assertIs(isfile(b'/tmp\x00abcds'), False) + @unittest.skipIf(sys.platform != 'win32', "drive letters are a windows concept") def test_isfile_driveletter(self): drive = os.environ.get('SystemDrive') @@ -1131,7 +1451,7 @@ def test_con_device(self): self.assertTrue(os.path.exists(r"\\.\CON")) @unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32") - @cpython_only + @support.cpython_only def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks in @@ -1195,9 +1515,6 @@ def _check_function(self, func): def test_path_normcase(self): self._check_function(self.path.normcase) - if sys.platform == 'win32': - self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') - self.assertEqual(ntpath.normcase('abc\x00def'), 'abc\x00def') def test_path_isabs(self): self._check_function(self.path.isabs) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a45aafc63fa697..2e50c1c62b328c 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -567,6 +567,14 @@ def test(default=None): with self.assertRaises(RecursionError): test() + def test_dont_specialize_custom_vectorcall(self): + def f(): + raise Exception("no way") + + _testinternalcapi.set_vectorcall_nop(f) + for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): + f() + def make_deferred_ref_count_obj(): """Create an object that uses deferred reference counting. diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py index e6ffd2b0ffeb0e..e476e4727803e5 100644 --- a/Lib/test/test_optparse.py +++ b/Lib/test/test_optparse.py @@ -615,9 +615,9 @@ def test_float_default(self): self.parser.add_option( "-p", "--prob", help="blow up with probability PROB [default: %default]") - self.parser.set_defaults(prob=0.43) + self.parser.set_defaults(prob=0.25) expected_help = self.help_prefix + \ - " -p PROB, --prob=PROB blow up with probability PROB [default: 0.43]\n" + " -p PROB, --prob=PROB blow up with probability PROB [default: 0.25]\n" self.assertHelp(self.parser, expected_help) def test_alt_expand(self): diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 9f131a9110dccb..4204a6a47d2a81 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -147,7 +147,7 @@ def test_fromkeys(self): def test_abc(self): OrderedDict = self.OrderedDict self.assertIsInstance(OrderedDict(), MutableMapping) - self.assertTrue(issubclass(OrderedDict, MutableMapping)) + self.assertIsSubclass(OrderedDict, MutableMapping) def test_clear(self): OrderedDict = self.OrderedDict @@ -314,14 +314,14 @@ def check(dup): check(dup) self.assertIs(dup.x, od.x) self.assertIs(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') dup = copy.deepcopy(od) check(dup) self.assertEqual(dup.x, od.x) self.assertIsNot(dup.x, od.x) self.assertEqual(dup.z, od.z) self.assertIsNot(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') # pickle directly pulls the module, so we have to fake it with replaced_module('collections', self.module): for proto in range(pickle.HIGHEST_PROTOCOL + 1): @@ -330,7 +330,7 @@ def check(dup): check(dup) self.assertEqual(dup.x, od.x) self.assertEqual(dup.z, od.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') check(eval(repr(od))) update_test = OrderedDict() update_test.update(od) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 333179a71e3cdc..de3a17fe893170 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -13,7 +13,6 @@ import locale import os import pickle -import platform import select import selectors import shutil @@ -105,7 +104,7 @@ def create_file(filename, content=b'content'): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class MiscTests(unittest.TestCase): @@ -818,7 +817,7 @@ def test_15261(self): self.assertEqual(ctx.exception.errno, errno.EBADF) def check_file_attributes(self, result): - self.assertTrue(hasattr(result, 'st_file_attributes')) + self.assertHasAttr(result, 'st_file_attributes') self.assertTrue(isinstance(result.st_file_attributes, int)) self.assertTrue(0 <= result.st_file_attributes <= 0xFFFFFFFF) @@ -1920,6 +1919,8 @@ def test_makedir(self): "WASI's umask is a stub." ) def test_mode(self): + # Note: in some cases, the umask might already be 2 in which case this + # will pass even if os.umask is actually broken. with os_helper.temp_umask(0o002): base = os_helper.TESTFN parent = os.path.join(base, 'dir1') @@ -2181,7 +2182,7 @@ def test_getrandom0(self): self.assertEqual(empty, b'') def test_getrandom_random(self): - self.assertTrue(hasattr(os, 'GRND_RANDOM')) + self.assertHasAttr(os, 'GRND_RANDOM') # Don't test os.getrandom(1, os.GRND_RANDOM) to not consume the rare # resource /dev/random @@ -4291,13 +4292,8 @@ def test_eventfd_select(self): @unittest.skipIf(sys.platform == "android", "gh-124873: Test is flaky on Android") @support.requires_linux_version(2, 6, 30) class TimerfdTests(unittest.TestCase): - # 1 ms accuracy is reliably achievable on every platform except Android - # emulators, where we allow 10 ms (gh-108277). - if sys.platform == "android" and platform.android_ver().is_emulator: - CLOCK_RES_PLACES = 2 - else: - CLOCK_RES_PLACES = 3 - + # gh-126112: Use 10 ms to tolerate slow buildbots + CLOCK_RES_PLACES = 2 # 10 ms CLOCK_RES = 10 ** -CLOCK_RES_PLACES CLOCK_RES_NS = 10 ** (9 - CLOCK_RES_PLACES) @@ -5431,8 +5427,8 @@ def test_fsencode_fsdecode(self): def test_pathlike(self): self.assertEqual('#feelthegil', self.fspath(FakePath('#feelthegil'))) - self.assertTrue(issubclass(FakePath, os.PathLike)) - self.assertTrue(isinstance(FakePath('x'), os.PathLike)) + self.assertIsSubclass(FakePath, os.PathLike) + self.assertIsInstance(FakePath('x'), os.PathLike) def test_garbage_in_exception_out(self): vapor = type('blah', (), {}) @@ -5458,8 +5454,8 @@ def test_pathlike_subclasshook(self): # true on abstract implementation. class A(os.PathLike): pass - self.assertFalse(issubclass(FakePath, A)) - self.assertTrue(issubclass(FakePath, os.PathLike)) + self.assertNotIsSubclass(FakePath, A) + self.assertIsSubclass(FakePath, os.PathLike) def test_pathlike_class_getitem(self): self.assertIsInstance(os.PathLike[bytes], types.GenericAlias) @@ -5469,7 +5465,7 @@ class A(os.PathLike): __slots__ = () def __fspath__(self): return '' - self.assertFalse(hasattr(A(), '__dict__')) + self.assertNotHasAttr(A(), '__dict__') def test_fspath_set_to_None(self): class Foo: diff --git a/Lib/test/test_pathlib/test_join.py b/Lib/test/test_pathlib/test_join.py index f1a24204b4c30a..6b51a09e5ac168 100644 --- a/Lib/test/test_pathlib/test_join.py +++ b/Lib/test/test_pathlib/test_join.py @@ -3,6 +3,8 @@ """ import unittest +import threading +from test.support import threading_helper from .support import is_pypi from .support.lexical_path import LexicalPath @@ -158,6 +160,26 @@ def test_parts(self): parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) + @threading_helper.requires_working_threading() + def test_parts_multithreaded(self): + P = self.cls + + NUM_THREADS = 10 + NUM_ITERS = 10 + + for _ in range(NUM_ITERS): + b = threading.Barrier(NUM_THREADS) + path = P('a') / 'b' / 'c' / 'd' / 'e' + expected = ('a', 'b', 'c', 'd', 'e') + + def check_parts(): + b.wait() + self.assertEqual(path.parts, expected) + + threads = [threading.Thread(target=check_parts) for _ in range(NUM_THREADS)] + with threading_helper.start_threads(threads): + pass + def test_parent(self): # Relative P = self.cls diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8a313cc4292574..a1ea69a6b906e8 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -17,10 +17,10 @@ from test.support import import_helper from test.support import cpython_only -from test.support import is_emscripten, is_wasi +from test.support import is_emscripten, is_wasi, is_wasm32 from test.support import infinite_recursion from test.support import os_helper -from test.support.os_helper import TESTFN, FakePath +from test.support.os_helper import TESTFN, FS_NONASCII, FakePath try: import fcntl except ImportError: @@ -77,8 +77,8 @@ def needs_symlinks(fn): class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): - self.assertTrue(issubclass(pathlib.UnsupportedOperation, NotImplementedError)) - self.assertTrue(isinstance(pathlib.UnsupportedOperation(), NotImplementedError)) + self.assertIsSubclass(pathlib.UnsupportedOperation, NotImplementedError) + self.assertIsInstance(pathlib.UnsupportedOperation(), NotImplementedError) class LazyImportTest(unittest.TestCase): @@ -293,6 +293,12 @@ def test_pickling_common(self): self.assertEqual(hash(pp), hash(p)) self.assertEqual(str(pp), str(p)) + def test_unpicking_3_13(self): + data = (b"\x80\x04\x95'\x00\x00\x00\x00\x00\x00\x00\x8c\x0e" + b"pathlib._local\x94\x8c\rPurePosixPath\x94\x93\x94)R\x94.") + p = pickle.loads(data) + self.assertIsInstance(p, pathlib.PurePosixPath) + def test_repr_common(self): for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): with self.subTest(pathstr=pathstr): @@ -300,8 +306,8 @@ def test_repr_common(self): clsname = p.__class__.__name__ r = repr(p) # The repr() is in the form ClassName("forward-slashes path"). - self.assertTrue(r.startswith(clsname + '('), r) - self.assertTrue(r.endswith(')'), r) + self.assertStartsWith(r, clsname + '(') + self.assertEndsWith(r, ')') inner = r[len(clsname) + 1 : -1] self.assertEqual(eval(inner), p.as_posix()) @@ -770,12 +776,16 @@ def test_as_uri_windows(self): self.assertEqual(self.make_uri(P('c:/')), 'file:///c:/') self.assertEqual(self.make_uri(P('c:/a/b.c')), 'file:///c:/a/b.c') self.assertEqual(self.make_uri(P('c:/a/b%#c')), 'file:///c:/a/b%25%23c') - self.assertEqual(self.make_uri(P('c:/a/b\xe9')), 'file:///c:/a/b%C3%A9') self.assertEqual(self.make_uri(P('//some/share/')), 'file://some/share/') self.assertEqual(self.make_uri(P('//some/share/a/b.c')), 'file://some/share/a/b.c') - self.assertEqual(self.make_uri(P('//some/share/a/b%#c\xe9')), - 'file://some/share/a/b%25%23c%C3%A9') + + from urllib.parse import quote_from_bytes + QUOTED_FS_NONASCII = quote_from_bytes(os.fsencode(FS_NONASCII)) + self.assertEqual(self.make_uri(P('c:/a/b' + FS_NONASCII)), + 'file:///c:/a/b' + QUOTED_FS_NONASCII) + self.assertEqual(self.make_uri(P('//some/share/a/b%#c' + FS_NONASCII)), + 'file://some/share/a/b%25%23c' + QUOTED_FS_NONASCII) @needs_windows def test_ordering_windows(self): @@ -2950,7 +2960,13 @@ def test_glob_dotdot(self): else: # ".." segments are normalized first on Windows, so this path is stat()able. self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + if sys.platform == "emscripten": + # Emscripten will return ELOOP if there are 49 or more ..'s. + # Can remove when https://github.com/emscripten-core/emscripten/pull/24591 is merged. + NDOTDOTS = 48 + else: + NDOTDOTS = 50 + self.assertEqual(set(p.glob("/".join([".."] * NDOTDOTS))), { P(self.base, *[".."] * NDOTDOTS)}) def test_glob_inaccessible(self): P = self.cls @@ -3154,7 +3170,7 @@ def test_absolute_posix(self): self.assertEqual(str(P('//a/b').absolute()), '//a/b') @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "umask is not implemented on Emscripten/WASI." ) @needs_posix @@ -3185,7 +3201,7 @@ def test_resolve_root(self): os.chdir(current_directory) @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "umask is not implemented on Emscripten/WASI." ) @needs_posix diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 54797d7898ff33..b1b8c7d49befde 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3232,6 +3232,37 @@ def test_pdb_issue_gh_127321(): """ +def test_pdb_issue_gh_136057(): + """See GH-136057 + "step" and "next" commands should be able to get over list comprehensions + >>> def test_function(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... lst = [i for i in range(10)] + ... for i in lst: pass + + >>> with PdbTestInput([ # doctest: +NORMALIZE_WHITESPACE + ... 'next', + ... 'next', + ... 'step', + ... 'continue', + ... ]): + ... test_function() + > <doctest test.test_pdb.test_pdb_issue_gh_136057[0]>(2)test_function() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) next + > <doctest test.test_pdb.test_pdb_issue_gh_136057[0]>(3)test_function() + -> lst = [i for i in range(10)] + (Pdb) next + > <doctest test.test_pdb.test_pdb_issue_gh_136057[0]>(4)test_function() + -> for i in lst: pass + (Pdb) step + --Return-- + > <doctest test.test_pdb.test_pdb_issue_gh_136057[0]>(4)test_function()->None + -> for i in lst: pass + (Pdb) continue + """ + + def test_pdb_issue_gh_80731(): """See GH-80731 @@ -3974,7 +4005,10 @@ def test_run_module_with_args(self): commands = """ continue """ - self._run_pdb(["calendar", "-m"], commands, expected_returncode=2) + self._run_pdb(["calendar", "-m"], commands, expected_returncode=1) + + _, stderr = self._run_pdb(["-m", "calendar", "-p", "1"], commands) + self.assertIn("unrecognized arguments: -p", stderr) stdout, _ = self._run_pdb(["-m", "calendar", "1"], commands) self.assertIn("December", stdout) @@ -4539,6 +4573,22 @@ def bar(): ])) self.assertIn('break in bar', stdout) + def test_issue_59000(self): + script = """ + def foo(): + pass + + class C: + def foo(self): + pass + """ + commands = """ + break C.foo + quit + """ + stdout, stderr = self.run_pdb_script(script, commands) + self.assertIn("The specified object 'C.foo' is not a function", stdout) + class ChecklineTests(unittest.TestCase): def setUp(self): @@ -4688,6 +4738,28 @@ def foo(): stdout, _ = self._run_script(script, commands) self.assertIn("42", stdout) + def test_readline_not_imported(self): + """GH-138860 + Directly or indirectly importing readline might deadlock a subprocess + if it's launched with process_group=0 or preexec_fn=setpgrp + + It's also a pattern that readline is never imported with just import pdb. + + This test is to ensure that readline is not imported for import pdb. + It's possible that we have a good reason to do that in the future. + """ + + script = textwrap.dedent(""" + import sys + import pdb + if "readline" in sys.modules: + print("readline imported") + """) + commands = "" + stdout, stderr = self._run_script(script, commands) + self.assertNotIn("readline imported", stdout) + self.assertEqual(stderr, "") + @support.force_colorized_test_class class PdbTestColorize(unittest.TestCase): @@ -4749,7 +4821,9 @@ def test_return_from_inline_mode_to_REPL(self): @support.force_not_colorized_test_class @support.requires_subprocess() class PdbTestReadline(unittest.TestCase): - def setUpClass(): + + @classmethod + def setUpClass(cls): # Ensure that the readline module is loaded # If this fails, the test is skipped because SkipTest will be raised readline = import_module('readline') @@ -4848,6 +4922,8 @@ def f(): self.assertIn(b'I love Python', output) + @unittest.skipIf(sys.platform.startswith('freebsd'), + '\\x08 is not interpreted as backspace on FreeBSD') def test_multiline_auto_indent(self): script = textwrap.dedent(""" import pdb; pdb.Pdb().set_trace() @@ -4886,6 +4962,8 @@ def test_multiline_completion(self): self.assertIn(b'42', output) + @unittest.skipIf(sys.platform.startswith('freebsd'), + '\\x08 is not interpreted as backspace on FreeBSD') def test_multiline_indent_completion(self): script = textwrap.dedent(""" import pdb; pdb.Pdb().set_trace() diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 47f51f1979faab..3a232e5ff085fe 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -292,6 +292,7 @@ def test_constant_folding_unaryop(self): ('---x', 'UNARY_NEGATIVE', None, False, None, None), ('~~~x', 'UNARY_INVERT', None, False, None, None), ('+++x', 'CALL_INTRINSIC_1', intrinsic_positive, False, None, None), + ('~True', 'UNARY_INVERT', None, False, None, None), ] for ( @@ -316,7 +317,7 @@ def negzero(): return -(1.0-1.0) for instr in dis.get_instructions(negzero): - self.assertFalse(instr.opname.startswith('UNARY_')) + self.assertNotStartsWith(instr.opname, 'UNARY_') self.check_lnotab(negzero) def test_constant_folding_binop(self): @@ -718,9 +719,9 @@ def format(fmt, *values): self.assertEqual(format('x = %d!', 1234), 'x = 1234!') self.assertEqual(format('x = %x!', 1234), 'x = 4d2!') self.assertEqual(format('x = %f!', 1234), 'x = 1234.000000!') - self.assertEqual(format('x = %s!', 1234.5678901), 'x = 1234.5678901!') - self.assertEqual(format('x = %f!', 1234.5678901), 'x = 1234.567890!') - self.assertEqual(format('x = %d!', 1234.5678901), 'x = 1234!') + self.assertEqual(format('x = %s!', 1234.0000625), 'x = 1234.0000625!') + self.assertEqual(format('x = %f!', 1234.0000625), 'x = 1234.000063!') + self.assertEqual(format('x = %d!', 1234.0000625), 'x = 1234!') self.assertEqual(format('x = %s%% %%%%', 1234), 'x = 1234% %%') self.assertEqual(format('x = %s!', '%% %s'), 'x = %% %s!') self.assertEqual(format('x = %s, y = %d', 12, 34), 'x = 12, y = 34') @@ -2614,6 +2615,90 @@ def test_send(self): ] self.cfg_optimization_test(insts, expected, consts=[None]) + def test_format_simple(self): + # FORMAT_SIMPLE will leave its operand on the stack if it's a unicode + # object. We treat it conservatively and assume that it always leaves + # its operand on the stack. + insts = [ + ("LOAD_FAST", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("STORE_FAST", 1, 3), + ] + self.check(insts, insts) + + insts = [ + ("LOAD_FAST", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("POP_TOP", None, 3), + ] + expected = [ + ("LOAD_FAST_BORROW", 0, 1), + ("FORMAT_SIMPLE", None, 2), + ("POP_TOP", None, 3), + ] + self.check(insts, expected) + + def test_set_function_attribute(self): + # SET_FUNCTION_ATTRIBUTE leaves the function on the stack + insts = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("STORE_FAST", 1, 4), + ("LOAD_CONST", 0, 5), + ("RETURN_VALUE", None, 6) + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + insts = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("RETURN_VALUE", None, 4) + ] + expected = [ + ("LOAD_CONST", 0, 1), + ("LOAD_FAST_BORROW", 0, 2), + ("SET_FUNCTION_ATTRIBUTE", 2, 3), + ("RETURN_VALUE", None, 4) + ] + self.cfg_optimization_test(insts, expected, consts=[None]) + + def test_get_yield_from_iter(self): + # GET_YIELD_FROM_ITER may leave its operand on the stack + insts = [ + ("LOAD_FAST", 0, 1), + ("GET_YIELD_FROM_ITER", None, 2), + ("LOAD_CONST", 0, 3), + send := self.Label(), + ("SEND", end := self.Label(), 5), + ("YIELD_VALUE", 1, 6), + ("RESUME", 2, 7), + ("JUMP", send, 8), + end, + ("END_SEND", None, 9), + ("LOAD_CONST", 0, 10), + ("RETURN_VALUE", None, 11), + ] + self.cfg_optimization_test(insts, insts, consts=[None]) + + def test_push_exc_info(self): + insts = [ + ("LOAD_FAST", 0, 1), + ("PUSH_EXC_INFO", None, 2), + ] + self.check(insts, insts) + + def test_load_special(self): + # LOAD_SPECIAL may leave self on the stack + insts = [ + ("LOAD_FAST", 0, 1), + ("LOAD_SPECIAL", 0, 2), + ("STORE_FAST", 1, 3), + ] + self.check(insts, insts) + + def test_del_in_finally(self): # This loads `obj` onto the stack, executes `del obj`, then returns the # `obj` from the stack. See gh-133371 for more details. @@ -2630,6 +2715,14 @@ def create_obj(): gc.collect() self.assertEqual(obj, [42]) + def test_format_simple_unicode(self): + # Repro from gh-134889 + def f(): + var = f"{1}" + var = f"{var}" + return var + self.assertEqual(f(), "1") + if __name__ == "__main__": diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py index 1095e7303c188f..aa01a9b8f7ed87 100644 --- a/Lib/test/test_peg_generator/test_c_parser.py +++ b/Lib/test/test_peg_generator/test_c_parser.py @@ -387,10 +387,10 @@ def test_with_stmt_with_paren(self) -> None: test_source = """ stmt = "with (\\n a as b,\\n c as d\\n): pass" the_ast = parse.parse_string(stmt, mode=1) - self.assertTrue(ast_dump(the_ast).startswith( + self.assertStartsWith(ast_dump(the_ast), "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), " "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]" - )) + ) """ self.run_test(grammar_source, test_source) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index d8606521345390..d912c55812397d 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -91,10 +91,8 @@ def test_gather(self) -> None: """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") - self.assertTrue( - repr(rules["start"]).startswith( - "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" - ) + self.assertStartsWith(repr(rules["start"]), + "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" ) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index c176e505155b90..1e1b0522787ff3 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -93,9 +93,7 @@ def baz(): perf_line, f"Could not find {expected_symbol} in perf file" ) perf_addr = perf_line.split(" ")[0] - self.assertFalse( - perf_addr.startswith("0x"), "Address should not be prefixed with 0x" - ) + self.assertNotStartsWith(perf_addr, "0x") self.assertTrue( set(perf_addr).issubset(string.hexdigits), "Address should contain only hex characters", @@ -162,6 +160,16 @@ def baz(): self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents) self.assertIn(f"py::baz_fork:{script}", child_perf_file_contents) + # The parent's map should not contain the child's symbols. + self.assertNotIn(f"py::foo_fork:{script}", perf_file_contents) + self.assertNotIn(f"py::bar_fork:{script}", perf_file_contents) + self.assertNotIn(f"py::baz_fork:{script}", perf_file_contents) + + # The child's map should not contain the parent's symbols. + self.assertNotIn(f"py::foo:{script}", child_perf_file_contents) + self.assertNotIn(f"py::bar:{script}", child_perf_file_contents) + self.assertNotIn(f"py::baz:{script}", child_perf_file_contents) + @unittest.skipIf(support.check_bolt_optimized(), "fails on BOLT instrumented binaries") def test_sys_api(self): code = """if 1: @@ -233,6 +241,24 @@ def test_sys_api_get_status(self): """ assert_python_ok("-c", code, PYTHON_JIT="0") + def test_sys_api_perf_jit_backend(self): + code = """if 1: + import sys + sys.activate_stack_trampoline("perf_jit") + assert sys.is_stack_trampoline_active() is True + sys.deactivate_stack_trampoline() + assert sys.is_stack_trampoline_active() is False + """ + assert_python_ok("-c", code, PYTHON_JIT="0") + + def test_sys_api_with_existing_perf_jit_trampoline(self): + code = """if 1: + import sys + sys.activate_stack_trampoline("perf_jit") + sys.activate_stack_trampoline("perf_jit") + """ + assert_python_ok("-c", code, PYTHON_JIT="0") + def is_unwinding_reliable_with_frame_pointers(): cflags = sysconfig.get_config_var("PY_CORE_CFLAGS") @@ -320,6 +346,7 @@ def run_perf(cwd, *args, use_jit=False, **env_vars): stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, + text=True, ) if proc.returncode: print(proc.stderr, file=sys.stderr) @@ -329,10 +356,10 @@ def run_perf(cwd, *args, use_jit=False, **env_vars): jit_output_file = cwd + "/jit_output.dump" command = ("perf", "inject", "-j", "-i", output_file, "-o", jit_output_file) proc = subprocess.run( - command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, env=env + command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, env=env, text=True ) if proc.returncode: - print(proc.stderr) + print(proc.stderr, file=sys.stderr) raise ValueError(f"Perf failed with return code {proc.returncode}") # Copy the jit_output_file to the output_file os.rename(jit_output_file, output_file) @@ -344,10 +371,9 @@ def run_perf(cwd, *args, use_jit=False, **env_vars): stderr=subprocess.PIPE, env=env, check=True, + text=True, ) - return proc.stdout.decode("utf-8", "replace"), proc.stderr.decode( - "utf-8", "replace" - ) + return proc.stdout, proc.stderr class TestPerfProfilerMixin: @@ -508,9 +534,12 @@ def _is_perf_version_at_least(major, minor): # The output of perf --version looks like "perf version 6.7-3" but # it can also be perf version "perf version 5.15.143", or even include # a commit hash in the version string, like "6.12.9.g242e6068fd5c" + # + # PermissionError is raised if perf does not exist on the Windows Subsystem + # for Linux, see #134987 try: output = subprocess.check_output(["perf", "--version"], text=True) - except (subprocess.CalledProcessError, FileNotFoundError): + except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): return False version = output.split()[2] version = version.split("-")[0] diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 742ca8de1bea8c..e2384b33345a45 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -611,10 +611,10 @@ def test_name_mapping(self): with self.subTest(((module3, name3), (module2, name2))): if (module2, name2) == ('exceptions', 'OSError'): attr = getattribute(module3, name3) - self.assertTrue(issubclass(attr, OSError)) + self.assertIsSubclass(attr, OSError) elif (module2, name2) == ('exceptions', 'ImportError'): attr = getattribute(module3, name3) - self.assertTrue(issubclass(attr, ImportError)) + self.assertIsSubclass(attr, ImportError) else: module, name = mapping(module2, name2) if module3[:1] != '_': diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index a178d3353eecdf..cf990874621eae 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -384,13 +384,13 @@ def test_string_without_quotes(self): self.check_dis_error(b'Sabc"\n.', '', "no string quotes around b'abc\"'") self.check_dis_error(b"S'abc\n.", '', - '''strinq quote b"'" not found at both ends of b"'abc"''') + '''string quote b"'" not found at both ends of b"'abc"''') self.check_dis_error(b'S"abc\n.', '', - r"""strinq quote b'"' not found at both ends of b'"abc'""") + r"""string quote b'"' not found at both ends of b'"abc'""") self.check_dis_error(b"S'abc\"\n.", '', - r"""strinq quote b"'" not found at both ends of b'\\'abc"'""") + r"""string quote b"'" not found at both ends of b'\\'abc"'""") self.check_dis_error(b"S\"abc'\n.", '', - r"""strinq quote b'"' not found at both ends of b'"abc\\''""") + r"""string quote b'"' not found at both ends of b'"abc\\''""") def test_binstring(self): self.check_dis(b"T\x03\x00\x00\x00abc.", '''\ diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py index 818e807dd3a6fb..e879e48571f3f7 100644 --- a/Lib/test/test_platform.py +++ b/Lib/test/test_platform.py @@ -410,7 +410,7 @@ def test_win32_ver(self): for v in version.split('.'): int(v) # should not fail if csd: - self.assertTrue(csd.startswith('SP'), msg=csd) + self.assertStartsWith(csd, 'SP') if ptype: if os.cpu_count() > 1: self.assertIn('Multiprocessor', ptype) @@ -525,8 +525,10 @@ def test_ios_ver(self): self.assertEqual(override.model, "Whiz") self.assertTrue(override.is_simulator) - @unittest.skipIf(support.is_emscripten, "Does not apply to Emscripten") def test_libc_ver(self): + if support.is_emscripten: + assert platform.libc_ver() == ("emscripten", "4.0.12") + return # check that libc_ver(executable) doesn't raise an exception if os.path.isdir(sys.executable) and \ os.path.exists(sys.executable+'.exe'): @@ -558,6 +560,10 @@ def test_libc_ver(self): # musl uses semver, but we accept some variations anyway: (b'/aports/main/musl/src/musl-12.5', ('musl', '12.5')), (b'/aports/main/musl/src/musl-1.2.5.7', ('musl', '1.2.5.7')), + (b'libc.musl.so.1', ('musl', '1')), + (b'libc.musl-x86_64.so.1.2.5', ('musl', '1.2.5')), + (b'ld-musl.so.1', ('musl', '1')), + (b'ld-musl-x86_64.so.1.2.5', ('musl', '1.2.5')), (b'', ('', '')), ): with open(filename, 'wb') as fp: @@ -576,6 +582,14 @@ def test_libc_ver(self): (b'GLIBC_1.23.4\0GLIBC_1.9\0GLIBC_1.21\0', ('glibc', '1.23.4')), (b'libc.so.2.4\0libc.so.9\0libc.so.23.1\0', ('libc', '23.1')), (b'musl-1.4.1\0musl-2.1.1\0musl-2.0.1\0', ('musl', '2.1.1')), + ( + b'libc.musl-x86_64.so.1.4.1\0libc.musl-x86_64.so.2.1.1\0libc.musl-x86_64.so.2.0.1', + ('musl', '2.1.1'), + ), + ( + b'ld-musl-x86_64.so.1.4.1\0ld-musl-x86_64.so.2.1.1\0ld-musl-x86_64.so.2.0.1', + ('musl', '2.1.1'), + ), (b'no match here, so defaults are used', ('test', '100.1.0')), ): with open(filename, 'wb') as f: diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index a0c76e5dec5ebe..de2a2fd1fc34bf 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -903,8 +903,7 @@ def test_dump_naive_datetime_with_aware_datetime_option(self): class TestBinaryPlistlib(unittest.TestCase): - @staticmethod - def decode(*objects, offset_size=1, ref_size=1): + def build(self, *objects, offset_size=1, ref_size=1): data = [b'bplist00'] offset = 8 offsets = [] @@ -916,7 +915,11 @@ def decode(*objects, offset_size=1, ref_size=1): len(objects), 0, offset) data.extend(offsets) data.append(tail) - return plistlib.loads(b''.join(data), fmt=plistlib.FMT_BINARY) + return b''.join(data) + + def decode(self, *objects, offset_size=1, ref_size=1): + data = self.build(*objects, offset_size=offset_size, ref_size=ref_size) + return plistlib.loads(data, fmt=plistlib.FMT_BINARY) def test_nonstandard_refs_size(self): # Issue #21538: Refs and offsets are 24-bit integers @@ -1024,6 +1027,34 @@ def test_invalid_binary(self): with self.assertRaises(plistlib.InvalidFileException): plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY) + def test_truncated_large_data(self): + self.addCleanup(os_helper.unlink, os_helper.TESTFN) + def check(data): + with open(os_helper.TESTFN, 'wb') as f: + f.write(data) + # buffered file + with open(os_helper.TESTFN, 'rb') as f: + with self.assertRaises(plistlib.InvalidFileException): + plistlib.load(f, fmt=plistlib.FMT_BINARY) + # unbuffered file + with open(os_helper.TESTFN, 'rb', buffering=0) as f: + with self.assertRaises(plistlib.InvalidFileException): + plistlib.load(f, fmt=plistlib.FMT_BINARY) + for w in range(20, 64): + s = 1 << w + # data + check(self.build(b'\x4f\x13' + s.to_bytes(8, 'big'))) + # ascii string + check(self.build(b'\x5f\x13' + s.to_bytes(8, 'big'))) + # unicode string + check(self.build(b'\x6f\x13' + s.to_bytes(8, 'big'))) + # array + check(self.build(b'\xaf\x13' + s.to_bytes(8, 'big'))) + # dict + check(self.build(b'\xdf\x13' + s.to_bytes(8, 'big'))) + # number of objects + check(b'bplist00' + struct.pack('>6xBBQQQ', 1, 1, s, 0, 8)) + def test_load_aware_datetime(self): data = (b'bplist003B\x04>\xd0d\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00' b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00' diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 0817d0a87a38b1..aad6fc6b4b619b 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -757,7 +757,7 @@ def test_makedev(self): self.assertRaises((ValueError, OverflowError), posix.makedev, x, minor) self.assertRaises((ValueError, OverflowError), posix.makedev, major, x) - if sys.platform == 'linux': + if sys.platform == 'linux' and not support.linked_to_musl(): NODEV = -1 self.assertEqual(posix.major(NODEV), NODEV) self.assertEqual(posix.minor(NODEV), NODEV) @@ -1107,7 +1107,7 @@ def test_lchmod_dir_symlink(self): def _test_chflags_regular_file(self, chflags_func, target_file, **kwargs): st = os.stat(target_file) - self.assertTrue(hasattr(st, 'st_flags')) + self.assertHasAttr(st, 'st_flags') # ZFS returns EOPNOTSUPP when attempting to set flag UF_IMMUTABLE. flags = st.st_flags | stat.UF_IMMUTABLE @@ -1143,7 +1143,7 @@ def test_lchflags_regular_file(self): def test_lchflags_symlink(self): testfn_st = os.stat(os_helper.TESTFN) - self.assertTrue(hasattr(testfn_st, 'st_flags')) + self.assertHasAttr(testfn_st, 'st_flags') self.addCleanup(os_helper.unlink, _DUMMY_SYMLINK) os.symlink(os_helper.TESTFN, _DUMMY_SYMLINK) @@ -1366,6 +1366,14 @@ def test_sched_param(self): self.assertNotEqual(newparam, param) self.assertEqual(newparam.sched_priority, 0) + @requires_sched + def test_bug_140634(self): + sched_priority = float('inf') # any new reference + param = posix.sched_param(sched_priority) + param.__reduce__() + del sched_priority, param # should not crash + support.gc_collect() # just to be sure + @unittest.skipUnless(hasattr(posix, "sched_rr_get_interval"), "no function") def test_sched_rr_get_interval(self): try: @@ -2036,6 +2044,12 @@ def test_setscheduler_only_param(self): @requires_sched @unittest.skipIf(sys.platform.startswith(('freebsd', 'netbsd')), "bpo-34685: test can fail on BSD") + @unittest.skipIf(platform.libc_ver()[0] == 'glibc' and + os.sched_getscheduler(0) in [ + os.SCHED_BATCH, + os.SCHED_IDLE, + os.SCHED_DEADLINE], + "Skip test due to glibc posix_spawn policy") def test_setscheduler_with_policy(self): policy = os.sched_getscheduler(0) priority = os.sched_get_priority_min(policy) @@ -2218,12 +2232,12 @@ def _verify_available(self, name): def test_pwritev(self): self._verify_available("HAVE_PWRITEV") if self.mac_ver >= (10, 16): - self.assertTrue(hasattr(os, "pwritev"), "os.pwritev is not available") - self.assertTrue(hasattr(os, "preadv"), "os.readv is not available") + self.assertHasAttr(os, "pwritev") + self.assertHasAttr(os, "preadv") else: - self.assertFalse(hasattr(os, "pwritev"), "os.pwritev is available") - self.assertFalse(hasattr(os, "preadv"), "os.readv is available") + self.assertNotHasAttr(os, "pwritev") + self.assertNotHasAttr(os, "preadv") def test_stat(self): self._verify_available("HAVE_FSTATAT") diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index fa19d549c26cfa..21f06712548d88 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -4,12 +4,13 @@ import random import sys import unittest -from posixpath import realpath, abspath, dirname, basename +from functools import partial +from posixpath import realpath, abspath, dirname, basename, ALLOW_MISSING from test import support from test import test_genericpath from test.support import import_helper from test.support import os_helper -from test.support.os_helper import FakePath +from test.support.os_helper import FakePath, TESTFN from unittest import mock try: @@ -21,7 +22,7 @@ # An absolute path to a temporary filename for testing. We can't rely on TESTFN # being an absolute path, so we need this. -ABSTFN = abspath(os_helper.TESTFN) +ABSTFN = abspath(TESTFN) def skip_if_ABSTFN_contains_backslash(test): """ @@ -33,21 +34,16 @@ def skip_if_ABSTFN_contains_backslash(test): msg = "ABSTFN is not a posix path - tests fail" return [test, unittest.skip(msg)(test)][found_backslash] -def safe_rmdir(dirname): - try: - os.rmdir(dirname) - except OSError: - pass + +def _parameterize(*parameters): + return support.subTests('kwargs', parameters) + class PosixPathTest(unittest.TestCase): def setUp(self): - self.tearDown() - - def tearDown(self): for suffix in ["", "1", "2"]: - os_helper.unlink(os_helper.TESTFN + suffix) - safe_rmdir(os_helper.TESTFN + suffix) + self.assertFalse(posixpath.lexists(ABSTFN + suffix)) def test_join(self): fn = posixpath.join @@ -194,25 +190,28 @@ def test_dirname(self): self.assertEqual(posixpath.dirname(b"//foo//bar"), b"//foo") def test_islink(self): - self.assertIs(posixpath.islink(os_helper.TESTFN + "1"), False) - self.assertIs(posixpath.lexists(os_helper.TESTFN + "2"), False) + self.assertIs(posixpath.islink(TESTFN + "1"), False) + self.assertIs(posixpath.lexists(TESTFN + "2"), False) - with open(os_helper.TESTFN + "1", "wb") as f: + self.addCleanup(os_helper.unlink, TESTFN + "1") + with open(TESTFN + "1", "wb") as f: f.write(b"foo") - self.assertIs(posixpath.islink(os_helper.TESTFN + "1"), False) + self.assertIs(posixpath.islink(TESTFN + "1"), False) if os_helper.can_symlink(): - os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN + "2") - self.assertIs(posixpath.islink(os_helper.TESTFN + "2"), True) - os.remove(os_helper.TESTFN + "1") - self.assertIs(posixpath.islink(os_helper.TESTFN + "2"), True) - self.assertIs(posixpath.exists(os_helper.TESTFN + "2"), False) - self.assertIs(posixpath.lexists(os_helper.TESTFN + "2"), True) - - self.assertIs(posixpath.islink(os_helper.TESTFN + "\udfff"), False) - self.assertIs(posixpath.islink(os.fsencode(os_helper.TESTFN) + b"\xff"), False) - self.assertIs(posixpath.islink(os_helper.TESTFN + "\x00"), False) - self.assertIs(posixpath.islink(os.fsencode(os_helper.TESTFN) + b"\x00"), False) + self.addCleanup(os_helper.unlink, TESTFN + "2") + os.symlink(TESTFN + "1", TESTFN + "2") + self.assertIs(posixpath.islink(TESTFN + "2"), True) + os.remove(TESTFN + "1") + self.assertIs(posixpath.islink(TESTFN + "2"), True) + self.assertIs(posixpath.exists(TESTFN + "2"), False) + self.assertIs(posixpath.lexists(TESTFN + "2"), True) + + def test_islink_invalid_paths(self): + self.assertIs(posixpath.islink(TESTFN + "\udfff"), False) + self.assertIs(posixpath.islink(os.fsencode(TESTFN) + b"\xff"), False) + self.assertIs(posixpath.islink(TESTFN + "\x00"), False) + self.assertIs(posixpath.islink(os.fsencode(TESTFN) + b"\x00"), False) def test_ismount(self): self.assertIs(posixpath.ismount("/"), True) @@ -227,8 +226,9 @@ def test_ismount_non_existent(self): os.mkdir(ABSTFN) self.assertIs(posixpath.ismount(ABSTFN), False) finally: - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) + def test_ismount_invalid_paths(self): self.assertIs(posixpath.ismount('/\udfff'), False) self.assertIs(posixpath.ismount(b'/\xff'), False) self.assertIs(posixpath.ismount('/\x00'), False) @@ -241,7 +241,7 @@ def test_ismount_symlinks(self): os.symlink("/", ABSTFN) self.assertIs(posixpath.ismount(ABSTFN), False) finally: - os.unlink(ABSTFN) + os_helper.unlink(ABSTFN) @unittest.skipIf(posix is None, "Test requires posix module") def test_ismount_different_device(self): @@ -448,32 +448,35 @@ def test_normpath(self): self.assertEqual(result, expected) @skip_if_ABSTFN_contains_backslash - def test_realpath_curdir(self): - self.assertEqual(realpath('.'), os.getcwd()) - self.assertEqual(realpath('./.'), os.getcwd()) - self.assertEqual(realpath('/'.join(['.'] * 100)), os.getcwd()) + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_curdir(self, kwargs): + self.assertEqual(realpath('.', **kwargs), os.getcwd()) + self.assertEqual(realpath('./.', **kwargs), os.getcwd()) + self.assertEqual(realpath('/'.join(['.'] * 100), **kwargs), os.getcwd()) - self.assertEqual(realpath(b'.'), os.getcwdb()) - self.assertEqual(realpath(b'./.'), os.getcwdb()) - self.assertEqual(realpath(b'/'.join([b'.'] * 100)), os.getcwdb()) + self.assertEqual(realpath(b'.', **kwargs), os.getcwdb()) + self.assertEqual(realpath(b'./.', **kwargs), os.getcwdb()) + self.assertEqual(realpath(b'/'.join([b'.'] * 100), **kwargs), os.getcwdb()) @skip_if_ABSTFN_contains_backslash - def test_realpath_pardir(self): - self.assertEqual(realpath('..'), dirname(os.getcwd())) - self.assertEqual(realpath('../..'), dirname(dirname(os.getcwd()))) - self.assertEqual(realpath('/'.join(['..'] * 100)), '/') + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_pardir(self, kwargs): + self.assertEqual(realpath('..', **kwargs), dirname(os.getcwd())) + self.assertEqual(realpath('../..', **kwargs), dirname(dirname(os.getcwd()))) + self.assertEqual(realpath('/'.join(['..'] * 100), **kwargs), '/') - self.assertEqual(realpath(b'..'), dirname(os.getcwdb())) - self.assertEqual(realpath(b'../..'), dirname(dirname(os.getcwdb()))) - self.assertEqual(realpath(b'/'.join([b'..'] * 100)), b'/') + self.assertEqual(realpath(b'..', **kwargs), dirname(os.getcwdb())) + self.assertEqual(realpath(b'../..', **kwargs), dirname(dirname(os.getcwdb()))) + self.assertEqual(realpath(b'/'.join([b'..'] * 100), **kwargs), b'/') @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_basic(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_basic(self, kwargs): # Basic operation. try: os.symlink(ABSTFN+"1", ABSTFN) - self.assertEqual(realpath(ABSTFN), ABSTFN+"1") + self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") finally: os_helper.unlink(ABSTFN) @@ -489,23 +492,121 @@ def test_realpath_strict(self): finally: os_helper.unlink(ABSTFN) + def test_realpath_invalid_paths(self): + path = '/\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = b'/\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = '/nonexistent/x\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = b'/nonexistent/x\x00' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = '/\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = b'/\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(ValueError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + + path = '/nonexistent/x\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + path = b'/nonexistent/x\x00/..' + self.assertRaises(ValueError, realpath, path, strict=False) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) + + path = '/\udfff' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), path) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) + path = '/nonexistent/\udfff' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), path) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + path = '/\udfff/..' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), '/') + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/') + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) + path = '/nonexistent/\udfff/..' + if sys.platform == 'win32': + self.assertEqual(realpath(path, strict=False), '/nonexistent') + self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/nonexistent') + else: + self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) + self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + + path = b'/\xff' + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) + else: + self.assertEqual(realpath(path, strict=False), path) + if support.is_wasi: + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + else: + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) + path = b'/nonexistent/\xff' + if sys.platform == 'win32': + self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) + self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) + else: + self.assertEqual(realpath(path, strict=False), path) + if support.is_wasi: + self.assertRaises(OSError, realpath, path, strict=True) + self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) + else: + self.assertRaises(FileNotFoundError, realpath, path, strict=True) + @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_relative(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_relative(self, kwargs): try: os.symlink(posixpath.relpath(ABSTFN+"1"), ABSTFN) - self.assertEqual(realpath(ABSTFN), ABSTFN+"1") + self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") finally: os_helper.unlink(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_missing_pardir(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_missing_pardir(self, kwargs): try: - os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN) - self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1") + os.symlink(TESTFN + "1", TESTFN) + self.assertEqual( + realpath("nonexistent/../" + TESTFN, **kwargs), ABSTFN + "1") finally: - os_helper.unlink(os_helper.TESTFN) + os_helper.unlink(TESTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -550,37 +651,38 @@ def test_realpath_symlink_loops(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_symlink_loops_strict(self): + @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_symlink_loops_strict(self, kwargs): # Bug #43757, raise OSError if we get into an infinite symlink loop in - # strict mode. + # the strict modes. try: os.symlink(ABSTFN, ABSTFN) - self.assertRaises(OSError, realpath, ABSTFN, strict=True) + self.assertRaises(OSError, realpath, ABSTFN, **kwargs) os.symlink(ABSTFN+"1", ABSTFN+"2") os.symlink(ABSTFN+"2", ABSTFN+"1") - self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"2", **kwargs) - self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True) - self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"1/x", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"1/..", **kwargs) + self.assertRaises(OSError, realpath, ABSTFN+"1/../x", **kwargs) os.symlink(ABSTFN+"x", ABSTFN+"y") self.assertRaises(OSError, realpath, - ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True) + ABSTFN+"1/../" + basename(ABSTFN) + "y", **kwargs) self.assertRaises(OSError, realpath, - ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True) + ABSTFN+"1/../" + basename(ABSTFN) + "1", **kwargs) os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a") - self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"a", **kwargs) os.symlink("../" + basename(dirname(ABSTFN)) + "/" + basename(ABSTFN) + "c", ABSTFN+"c") - self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True) + self.assertRaises(OSError, realpath, ABSTFN+"c", **kwargs) # Test using relative path as well. with os_helper.change_cwd(dirname(ABSTFN)): - self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True) + self.assertRaises(OSError, realpath, basename(ABSTFN), **kwargs) finally: os_helper.unlink(ABSTFN) os_helper.unlink(ABSTFN+"1") @@ -591,28 +693,30 @@ def test_realpath_symlink_loops_strict(self): @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_repeated_indirect_symlinks(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_repeated_indirect_symlinks(self, kwargs): # Issue #6975. try: os.mkdir(ABSTFN) os.symlink('../' + basename(ABSTFN), ABSTFN + '/self') os.symlink('self/self/self', ABSTFN + '/link') - self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN) + self.assertEqual(realpath(ABSTFN + '/link', **kwargs), ABSTFN) finally: os_helper.unlink(ABSTFN + '/self') os_helper.unlink(ABSTFN + '/link') - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_deep_recursion(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_deep_recursion(self, kwargs): depth = 10 try: os.mkdir(ABSTFN) for i in range(depth): os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1)) os.symlink('.', ABSTFN + '/0') - self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN) + self.assertEqual(realpath(ABSTFN + '/%d' % depth, **kwargs), ABSTFN) # Test using relative path as well. with os_helper.change_cwd(ABSTFN): @@ -620,11 +724,12 @@ def test_realpath_deep_recursion(self): finally: for i in range(depth + 1): os_helper.unlink(ABSTFN + '/%d' % i) - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_parents(self): + @_parameterize({}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_parents(self, kwargs): # We also need to resolve any symlinks in the parents of a relative # path passed to realpath. E.g.: current working directory is # /usr/doc with 'doc' being a symlink to /usr/share/doc. We call @@ -635,15 +740,17 @@ def test_realpath_resolve_parents(self): os.symlink(ABSTFN + "/y", ABSTFN + "/k") with os_helper.change_cwd(ABSTFN + "/k"): - self.assertEqual(realpath("a"), ABSTFN + "/y/a") + self.assertEqual(realpath("a", **kwargs), + ABSTFN + "/y/a") finally: os_helper.unlink(ABSTFN + "/k") - safe_rmdir(ABSTFN + "/y") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/y") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_before_normalizing(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_before_normalizing(self, kwargs): # Bug #990669: Symbolic links should be resolved before we # normalize the path. E.g.: if we have directories 'a', 'k' and 'y' # in the following hierarchy: @@ -658,20 +765,21 @@ def test_realpath_resolve_before_normalizing(self): os.symlink(ABSTFN + "/k/y", ABSTFN + "/link-y") # Absolute path. - self.assertEqual(realpath(ABSTFN + "/link-y/.."), ABSTFN + "/k") + self.assertEqual(realpath(ABSTFN + "/link-y/..", **kwargs), ABSTFN + "/k") # Relative path. with os_helper.change_cwd(dirname(ABSTFN)): - self.assertEqual(realpath(basename(ABSTFN) + "/link-y/.."), + self.assertEqual(realpath(basename(ABSTFN) + "/link-y/..", **kwargs), ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "/link-y") - safe_rmdir(ABSTFN + "/k/y") - safe_rmdir(ABSTFN + "/k") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/k/y") + os_helper.rmdir(ABSTFN + "/k") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash - def test_realpath_resolve_first(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_resolve_first(self, kwargs): # Bug #1213894: The first component of the path, if not absolute, # must be resolved too. @@ -681,12 +789,12 @@ def test_realpath_resolve_first(self): os.symlink(ABSTFN, ABSTFN + "link") with os_helper.change_cwd(dirname(ABSTFN)): base = basename(ABSTFN) - self.assertEqual(realpath(base + "link"), ABSTFN) - self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k") + self.assertEqual(realpath(base + "link", **kwargs), ABSTFN) + self.assertEqual(realpath(base + "link/k", **kwargs), ABSTFN + "/k") finally: os_helper.unlink(ABSTFN + "link") - safe_rmdir(ABSTFN + "/k") - safe_rmdir(ABSTFN) + os_helper.rmdir(ABSTFN + "/k") + os_helper.rmdir(ABSTFN) @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -700,12 +808,67 @@ def test_realpath_unreadable_symlink(self): self.assertEqual(realpath(ABSTFN + '/foo'), ABSTFN + '/foo') self.assertEqual(realpath(ABSTFN + '/../foo'), dirname(ABSTFN) + '/foo') self.assertEqual(realpath(ABSTFN + '/foo/..'), ABSTFN) + finally: + os.chmod(ABSTFN, 0o755, follow_symlinks=False) + os_helper.unlink(ABSTFN) + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash + @unittest.skipIf(os.chmod not in os.supports_follow_symlinks, "Can't set symlink permissions") + @unittest.skipIf(sys.platform != "darwin", "only macOS requires read permission to readlink()") + @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) + def test_realpath_unreadable_symlink_strict(self, kwargs): + try: + os.symlink(ABSTFN+"1", ABSTFN) + os.chmod(ABSTFN, 0o000, follow_symlinks=False) + with self.assertRaises(PermissionError): + realpath(ABSTFN, **kwargs) + with self.assertRaises(PermissionError): + realpath(ABSTFN + '/foo', **kwargs), with self.assertRaises(PermissionError): - realpath(ABSTFN, strict=True) + realpath(ABSTFN + '/../foo', **kwargs) + with self.assertRaises(PermissionError): + realpath(ABSTFN + '/foo/..', **kwargs) finally: os.chmod(ABSTFN, 0o755, follow_symlinks=False) os.unlink(ABSTFN) + @skip_if_ABSTFN_contains_backslash + @os_helper.skip_unless_symlink + def test_realpath_unreadable_directory(self): + try: + os.mkdir(ABSTFN) + os.mkdir(ABSTFN + '/k') + os.chmod(ABSTFN, 0o000) + self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN) + + try: + os.stat(ABSTFN) + except PermissionError: + pass + else: + self.skipTest('Cannot block permissions') + + self.assertEqual(realpath(ABSTFN + '/k', strict=False), + ABSTFN + '/k') + self.assertRaises(PermissionError, realpath, ABSTFN + '/k', + strict=True) + self.assertRaises(PermissionError, realpath, ABSTFN + '/k', + strict=ALLOW_MISSING) + + self.assertEqual(realpath(ABSTFN + '/missing', strict=False), + ABSTFN + '/missing') + self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', + strict=True) + self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', + strict=ALLOW_MISSING) + finally: + os.chmod(ABSTFN, 0o755) + os_helper.rmdir(ABSTFN + '/k') + os_helper.rmdir(ABSTFN) + @skip_if_ABSTFN_contains_backslash def test_realpath_nonterminal_file(self): try: @@ -713,14 +876,27 @@ def test_realpath_nonterminal_file(self): f.write('test_posixpath wuz ere') self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN) self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN) + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN) + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) @@ -733,16 +909,30 @@ def test_realpath_nonterminal_symlink_to_file(self): os.symlink(ABSTFN + "1", ABSTFN) self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN + "1") self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "1") + self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN + "1") + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN + "1") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN + "1") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "1/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN + "1") @os_helper.skip_unless_symlink @skip_if_ABSTFN_contains_backslash @@ -754,16 +944,31 @@ def test_realpath_nonterminal_symlink_to_symlinks_to_file(self): os.symlink(ABSTFN + "1", ABSTFN) self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN + "2") self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "2") + self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN + "2") + self.assertEqual(realpath(ABSTFN + "/", strict=False), ABSTFN + "2") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/.", strict=False), ABSTFN + "2") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/.", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/..", strict=False), dirname(ABSTFN)) self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/..", + strict=ALLOW_MISSING) + self.assertEqual(realpath(ABSTFN + "/subdir", strict=False), ABSTFN + "2/subdir") self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", strict=True) + self.assertRaises(NotADirectoryError, realpath, ABSTFN + "/subdir", + strict=ALLOW_MISSING) finally: os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN + "1") + os_helper.unlink(ABSTFN + "2") def test_relpath(self): (real_getcwd, os.getcwd) = (os.getcwd, lambda: r"/home/user/bar") @@ -889,8 +1094,8 @@ class PathLikeTests(unittest.TestCase): path = posixpath def setUp(self): - self.file_name = os_helper.TESTFN - self.file_path = FakePath(os_helper.TESTFN) + self.file_name = TESTFN + self.file_path = FakePath(TESTFN) self.addCleanup(os_helper.unlink, self.file_name) with open(self.file_name, 'xb', 0) as file: file.write(b"test_posixpath.PathLikeTests") @@ -947,9 +1152,12 @@ def test_path_normpath(self): def test_path_abspath(self): self.assertPathEqual(self.path.abspath) - def test_path_realpath(self): + @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) + def test_path_realpath(self, kwargs): self.assertPathEqual(self.path.realpath) + self.assertPathEqual(partial(self.path.realpath, **kwargs)) + def test_path_relpath(self): self.assertPathEqual(self.path.relpath) diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index f68996f72b15b5..403d2e9008489e 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -380,7 +380,7 @@ def __new__(cls, celsius_degrees): return super().__new__(Temperature, celsius_degrees) def __repr__(self): kelvin_degrees = self + 273.15 - return f"{kelvin_degrees}°K" + return f"{kelvin_degrees:.2f}°K" self.assertEqual(pprint.pformat(Temperature(1000)), '1273.15°K') def test_sorted_dict(self): diff --git a/Lib/test/test_property.py b/Lib/test/test_property.py index cea241b0f200d0..26aefdbf0421dd 100644 --- a/Lib/test/test_property.py +++ b/Lib/test/test_property.py @@ -87,8 +87,8 @@ def test_property_decorator_baseclass(self): self.assertEqual(base.spam, 10) self.assertEqual(base._spam, 10) delattr(base, "spam") - self.assertTrue(not hasattr(base, "spam")) - self.assertTrue(not hasattr(base, "_spam")) + self.assertNotHasAttr(base, "spam") + self.assertNotHasAttr(base, "_spam") base.spam = 20 self.assertEqual(base.spam, 20) self.assertEqual(base._spam, 20) diff --git a/Lib/test/test_pty.py b/Lib/test/test_pty.py index c1728f5019d042..b5407642f823e5 100644 --- a/Lib/test/test_pty.py +++ b/Lib/test/test_pty.py @@ -1,14 +1,13 @@ import unittest from test.support import ( - is_android, is_apple_mobile, is_emscripten, is_wasi, reap_children, verbose + is_android, is_apple_mobile, is_wasm32, reap_children, verbose ) from test.support.import_helper import import_module -from test.support.os_helper import TESTFN, unlink # Skip these tests if termios is not available import_module('termios') -if is_android or is_apple_mobile or is_emscripten or is_wasi: +if is_android or is_apple_mobile or is_wasm32: raise unittest.SkipTest("pty is not available on this platform") import errno @@ -297,26 +296,27 @@ def test_master_read(self): self.assertEqual(data, b"") def test_spawn_doesnt_hang(self): - self.addCleanup(unlink, TESTFN) - with open(TESTFN, 'wb') as f: - STDOUT_FILENO = 1 - dup_stdout = os.dup(STDOUT_FILENO) - os.dup2(f.fileno(), STDOUT_FILENO) - buf = b'' - def master_read(fd): - nonlocal buf - data = os.read(fd, 1024) - buf += data - return data + # gh-140482: Do the test in a pty.fork() child to avoid messing + # with the interactive test runner's terminal settings. + pid, fd = pty.fork() + if pid == pty.CHILD: + pty.spawn([sys.executable, '-c', 'print("hi there")']) + os._exit(0) + + try: + buf = bytearray() try: - pty.spawn([sys.executable, '-c', 'print("hi there")'], - master_read) - finally: - os.dup2(dup_stdout, STDOUT_FILENO) - os.close(dup_stdout) - self.assertEqual(buf, b'hi there\r\n') - with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), b'hi there\r\n') + while (data := os.read(fd, 1024)) != b'': + buf.extend(data) + except OSError as e: + if e.errno != errno.EIO: + raise + + (pid, status) = os.waitpid(pid, 0) + self.assertEqual(status, 0) + self.assertEqual(bytes(buf), b"hi there\r\n") + finally: + os.close(fd) class SmallPtyTests(unittest.TestCase): """These tests don't spawn children or hang.""" diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py index 6dc51e4371d0f6..3c8ed251acaa4d 100644 --- a/Lib/test/test_pulldom.py +++ b/Lib/test/test_pulldom.py @@ -46,7 +46,7 @@ def test_parse_semantics(self): items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) @@ -192,7 +192,7 @@ def _test_thorough(self, pd, before_root=True): evt, node = next(pd) self.assertEqual(pulldom.START_DOCUMENT, evt) # Just check the node is a Document: - self.assertTrue(hasattr(node, "createElement")) + self.assertHasAttr(node, "createElement") if before_root: evt, node = next(pd) diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index df05cd07d7e249..3062f4eb9be557 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -103,7 +103,7 @@ def ismethod(oclass, obj, name): for name, value in dict.items(): if name in ignore: continue - self.assertHasAttr(module, name, ignore) + self.assertHasAttr(module, name) py_item = getattr(module, name) if isinstance(value, pyclbr.Function): self.assertIsInstance(py_item, (FunctionType, BuiltinFunctionType)) @@ -254,7 +254,7 @@ def test_others(self): 'pdb', # pyclbr does not handle elegantly `typing` or properties ignore=('Union', '_ModuleTarget', '_ScriptTarget', '_ZipTarget', 'curframe_locals', - '_InteractState'), + '_InteractState', 'rlcompleter'), ) cm('pydoc', ignore=('input', 'output',)) # properties diff --git a/Lib/test/test_pydoc/pydocfodder.py b/Lib/test/test_pydoc/pydocfodder.py index 3cc2d5bd57fe5b..412aa3743e430b 100644 --- a/Lib/test/test_pydoc/pydocfodder.py +++ b/Lib/test/test_pydoc/pydocfodder.py @@ -87,6 +87,8 @@ def B_classmethod(cls, x): object_repr = object.__repr__ get = {}.get # same name dict_get = {}.get + from math import sin + B.B_classmethod_ref = B.B_classmethod @@ -186,3 +188,4 @@ def __call__(self, inst): object_repr = object.__repr__ get = {}.get # same name dict_get = {}.get +from math import sin # noqa: F401 diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index ac88b3c6f13d5e..5de99b8329bf74 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -11,7 +11,6 @@ import _pickle import pkgutil import re -import stat import tempfile import test.support import time @@ -33,7 +32,7 @@ assert_python_failure, spawn_python) from test.support import threading_helper from test.support import (reap_children, captured_stdout, - captured_stderr, is_emscripten, is_wasi, + captured_stderr, is_wasm32, requires_docstrings, MISSING_C_DOCSTRINGS) from test.support.os_helper import (TESTFN, rmtree, unlink) from test.test_pydoc import pydoc_mod @@ -1300,7 +1299,6 @@ def test_apropos_with_unreadable_dir(self): self.assertEqual(out.getvalue(), '') self.assertEqual(err.getvalue(), '') - @os_helper.skip_unless_working_chmod def test_apropos_empty_doc(self): pkgdir = os.path.join(TESTFN, 'walkpkg') os.mkdir(pkgdir) @@ -1308,14 +1306,9 @@ def test_apropos_empty_doc(self): init_path = os.path.join(pkgdir, '__init__.py') with open(init_path, 'w') as fobj: fobj.write("foo = 1") - current_mode = stat.S_IMODE(os.stat(pkgdir).st_mode) - try: - os.chmod(pkgdir, current_mode & ~stat.S_IEXEC) - with self.restrict_walk_packages(path=[TESTFN]), captured_stdout() as stdout: - pydoc.apropos('') - self.assertIn('walkpkg', stdout.getvalue()) - finally: - os.chmod(pkgdir, current_mode) + with self.restrict_walk_packages(path=[TESTFN]), captured_stdout() as stdout: + pydoc.apropos('') + self.assertIn('walkpkg', stdout.getvalue()) def test_url_search_package_error(self): # URL handler search should cope with packages that raise exceptions @@ -1380,7 +1373,7 @@ def test_modules_search_builtin(self): helper('modules garbage') result = help_io.getvalue() - self.assertTrue(result.startswith(expected)) + self.assertStartsWith(result, expected) def test_importfile(self): try: @@ -1946,9 +1939,11 @@ def test_text_doc_routines_in_class(self, cls=pydocfodder.B): if not support.MISSING_C_DOCSTRINGS: self.assertIn(' | get(key, default=None, /) method of builtins.dict instance', lines) self.assertIn(' | dict_get = get(key, default=None, /) method of builtins.dict instance', lines) + self.assertIn(' | sin(x, /)', lines) else: self.assertIn(' | get(...) method of builtins.dict instance', lines) self.assertIn(' | dict_get = get(...) method of builtins.dict instance', lines) + self.assertIn(' | sin(object, /)', lines) lines = self.getsection(result, f' | Class methods {where}:', ' | ' + '-'*70) self.assertIn(' | B_classmethod(x)', lines) @@ -2034,6 +2029,11 @@ def test_text_doc_routines_in_module(self): self.assertIn(' __repr__(...) unbound builtins.object method', lines) self.assertIn(' object_repr = __repr__(...) unbound builtins.object method', lines) + # builtin functions + if not support.MISSING_C_DOCSTRINGS: + self.assertIn(' sin(x, /)', lines) + else: + self.assertIn(' sin(object, /)', lines) def test_html_doc_routines_in_module(self): doc = pydoc.HTMLDoc() @@ -2074,9 +2074,15 @@ def test_html_doc_routines_in_module(self): self.assertIn(' __repr__(...) unbound builtins.object method', lines) self.assertIn(' object_repr = __repr__(...) unbound builtins.object method', lines) + # builtin functions + if not support.MISSING_C_DOCSTRINGS: + self.assertIn(' sin(x, /)', lines) + else: + self.assertIn(' sin(object, /)', lines) + @unittest.skipIf( - is_emscripten or is_wasi, + is_wasm32, "Socket server not available on Emscripten/WASI." ) class PydocServerTest(unittest.TestCase): diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 1d56ccd71cf962..daeaa38a3c5085 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -1,14 +1,18 @@ # XXX TypeErrors on calling handlers, or on bad return values from a # handler, are obscure and unhelpful. +import abc +import functools import os +import re import sys import sysconfig +import textwrap import unittest import traceback from io import BytesIO from test import support -from test.support import os_helper +from test.support import import_helper, os_helper from xml.parsers import expat from xml.parsers.expat import errors @@ -668,6 +672,23 @@ def test_change_size_2(self): parser.Parse(xml2, True) self.assertEqual(self.n, 4) +class ElementDeclHandlerTest(unittest.TestCase): + def test_trigger_leak(self): + # Unfixed, this test would leak the memory of the so-called + # "content model" in function ``my_ElementDeclHandler`` of pyexpat. + # See https://github.com/python/cpython/issues/140593. + data = textwrap.dedent('''\ + <!DOCTYPE quotations SYSTEM "quotations.dtd" [ + <!ELEMENT root ANY> + ]> + <root/> + ''').encode('UTF-8') + + parser = expat.ParserCreate() + parser.NotStandaloneHandler = lambda: 1.234 # arbitrary float + parser.ElementDeclHandler = lambda _1, _2: None + self.assertRaises(TypeError, parser.Parse, data, True) + class MalformedInputTest(unittest.TestCase): def test1(self): xml = b"\0\r\n" @@ -755,6 +776,42 @@ def resolve_entity(context, base, system_id, public_id): self.assertEqual(handler_call_args, [("bar", "baz")]) +class ParentParserLifetimeTest(unittest.TestCase): + """ + Subparsers make use of their parent XML_Parser inside of Expat. + As a result, parent parsers need to outlive subparsers. + + See https://github.com/python/cpython/issues/139400. + """ + + def test_parent_parser_outlives_its_subparsers__single(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parser + # while it's still being referenced by a related subparser. + del parser + + def test_parent_parser_outlives_its_subparsers__multiple(self): + parser = expat.ParserCreate() + subparser_one = parser.ExternalEntityParserCreate(None) + subparser_two = parser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parser + # while it's still being referenced by a related subparser. + del parser + + def test_parent_parser_outlives_its_subparsers__chain(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + subsubparser = subparser.ExternalEntityParserCreate(None) + + # Now try to cause garbage collection of the parent parsers + # while they are still being referenced by a related subparser. + del parser + del subparser + + class ReparseDeferralTest(unittest.TestCase): def test_getter_setter_round_trip(self): parser = expat.ParserCreate() @@ -809,5 +866,199 @@ def start_element(name, _): self.assertEqual(started, ['doc']) +class AttackProtectionTestBase(abc.ABC): + """ + Base class for testing protections against XML payloads with + disproportionate amplification. + + The protections being tested should detect and prevent attacks + that leverage disproportionate amplification from small inputs. + """ + + @staticmethod + def exponential_expansion_payload(*, nrows, ncols, text='.'): + """Create a billion laughs attack payload. + + Be careful: the number of total items is pow(n, k), thereby + requiring at least pow(ncols, nrows) * sizeof(text) memory! + """ + template = textwrap.dedent(f"""\ + <?xml version="1.0"?> + <!DOCTYPE doc [ + <!ENTITY row0 "{text}"> + <!ELEMENT doc (#PCDATA)> + {{body}} + ]> + <doc>&row{nrows};</doc> + """).rstrip() + + body = '\n'.join( + f'<!ENTITY row{i + 1} "{f"&row{i};" * ncols}">' + for i in range(nrows) + ) + body = textwrap.indent(body, ' ' * 4) + return template.format(body=body) + + def test_payload_generation(self): + # self-test for exponential_expansion_payload() + payload = self.exponential_expansion_payload(nrows=2, ncols=3) + self.assertEqual(payload, textwrap.dedent("""\ + <?xml version="1.0"?> + <!DOCTYPE doc [ + <!ENTITY row0 "."> + <!ELEMENT doc (#PCDATA)> + <!ENTITY row1 "&row0;&row0;&row0;"> + <!ENTITY row2 "&row1;&row1;&row1;"> + ]> + <doc>&row2;</doc> + """).rstrip()) + + def assert_root_parser_failure(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) is invalid for a sub-parser.""" + msg = "parser must be a root parser" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + @abc.abstractmethod + def assert_rejected(self, func, /, *args, **kwargs): + """Assert that func(*args, **kwargs) triggers the attack protection. + + Note: this method must ensure that the attack protection being tested + is the one that is actually triggered at runtime, e.g., by matching + the exact error message. + """ + + @abc.abstractmethod + def set_activation_threshold(self, parser, threshold): + """Set the activation threshold for the tested protection.""" + + @abc.abstractmethod + def set_maximum_amplification(self, parser, max_factor): + """Set the maximum amplification factor for the tested protection.""" + + @abc.abstractmethod + def test_set_activation_threshold__threshold_reached(self): + """Test when the activation threshold is exceeded.""" + + @abc.abstractmethod + def test_set_activation_threshold__threshold_not_reached(self): + """Test when the activation threshold is not exceeded.""" + + def test_set_activation_threshold__invalid_threshold_type(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(TypeError, setter, 1.0) + self.assertRaises(TypeError, setter, -1.5) + self.assertRaises(ValueError, setter, -5) + + def test_set_activation_threshold__invalid_threshold_range(self): + _testcapi = import_helper.import_module("_testcapi") + parser = expat.ParserCreate() + setter = functools.partial(self.set_activation_threshold, parser) + + self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1) + + def test_set_activation_threshold__fail_for_subparser(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + setter = functools.partial(self.set_activation_threshold, subparser) + self.assert_root_parser_failure(setter, 12345) + + @abc.abstractmethod + def test_set_maximum_amplification__amplification_exceeded(self): + """Test when the amplification factor is exceeded.""" + + @abc.abstractmethod + def test_set_maximum_amplification__amplification_not_exceeded(self): + """Test when the amplification factor is not exceeded.""" + + def test_set_maximum_amplification__infinity(self): + inf = float('inf') # an 'inf' threshold is allowed by Expat + parser = expat.ParserCreate() + self.assertIsNone(self.set_maximum_amplification(parser, inf)) + + def test_set_maximum_amplification__invalid_max_factor_type(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_maximum_amplification, parser) + + self.assertRaises(TypeError, setter, None) + self.assertRaises(TypeError, setter, 'abc') + + def test_set_maximum_amplification__invalid_max_factor_range(self): + parser = expat.ParserCreate() + setter = functools.partial(self.set_maximum_amplification, parser) + + msg = re.escape("'max_factor' must be at least 1.0") + self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan')) + self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99) + + def test_set_maximum_amplification__fail_for_subparser(self): + parser = expat.ParserCreate() + subparser = parser.ExternalEntityParserCreate(None) + setter = functools.partial(self.set_maximum_amplification, subparser) + self.assert_root_parser_failure(setter, 123.45) + + +@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2") +class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase): + + # NOTE: with the default Expat configuration, the billion laughs protection + # may hit before the allocation limiter if exponential_expansion_payload() + # is not carefully parametrized. As such, the payloads should be chosen so + # that either the allocation limiter is hit before other protections are + # triggered or no protection at all is triggered. + + def assert_rejected(self, func, /, *args, **kwargs): + """Check that func(*args, **kwargs) hits the allocation limit.""" + msg = r"out of memory: line \d+, column \d+" + self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs) + + def set_activation_threshold(self, parser, threshold): + return parser.SetAllocTrackerActivationThreshold(threshold) + + def set_maximum_amplification(self, parser, max_factor): + return parser.SetAllocTrackerMaximumAmplification(max_factor) + + def test_set_activation_threshold__threshold_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be always reached. + self.set_activation_threshold(parser, 3) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_activation_threshold__threshold_not_reached(self): + parser = expat.ParserCreate() + # Choose a threshold expected to be never reached. + self.set_activation_threshold(parser, pow(10, 5)) + # Check that the threshold is reached by choosing a small factor + # and a payload whose peak amplification factor exceeds it. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + payload = self.exponential_expansion_payload(ncols=10, nrows=4) + self.assertIsNotNone(parser.Parse(payload, True)) + + def test_set_maximum_amplification__amplification_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to always be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1.0)) + # Craft a payload for which the peak amplification factor is > 1.0. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assert_rejected(parser.Parse, payload, True) + + def test_set_maximum_amplification__amplification_not_exceeded(self): + parser = expat.ParserCreate() + # Unconditionally enable maximum activation factor. + self.set_activation_threshold(parser, 0) + # Choose a max amplification factor expected to never be exceeded. + self.assertIsNone(self.set_maximum_amplification(parser, 1e4)) + # Craft a payload for which the peak amplification factor is < 1e4. + payload = self.exponential_expansion_payload(ncols=1, nrows=2) + self.assertIsNotNone(parser.Parse(payload, True)) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_pyrepl/__init__.py b/Lib/test/test_pyrepl/__init__.py index 8359d9844623c2..2f37bff6df8b4a 100644 --- a/Lib/test/test_pyrepl/__init__.py +++ b/Lib/test/test_pyrepl/__init__.py @@ -1,14 +1,10 @@ import os import sys -from test.support import requires, load_package_tests -from test.support.import_helper import import_module +from test.support import import_helper, load_package_tests + if sys.platform != "win32": - # On non-Windows platforms, testing pyrepl currently requires that the - # 'curses' resource be given on the regrtest command line using the -u - # option. Additionally, we need to attempt to import curses and readline. - requires("curses") - curses = import_module("curses") + import_helper.import_module("termios") def load_tests(*args): diff --git a/Lib/test/test_pyrepl/eio_test_script.py b/Lib/test/test_pyrepl/eio_test_script.py new file mode 100644 index 00000000000000..e3ea6caef58e80 --- /dev/null +++ b/Lib/test/test_pyrepl/eio_test_script.py @@ -0,0 +1,94 @@ +import errno +import fcntl +import os +import pty +import signal +import sys +import termios + + +def handler(sig, f): + pass + + +def create_eio_condition(): + # SIGINT handler used to produce an EIO. + # See https://github.com/python/cpython/issues/135329. + try: + master_fd, slave_fd = pty.openpty() + child_pid = os.fork() + if child_pid == 0: + try: + os.setsid() + fcntl.ioctl(slave_fd, termios.TIOCSCTTY, 0) + child_process_group_id = os.getpgrp() + grandchild_pid = os.fork() + if grandchild_pid == 0: + os.setpgid(0, 0) # set process group for grandchild + os.dup2(slave_fd, 0) # redirect stdin + if slave_fd > 2: + os.close(slave_fd) + # Fork grandchild for terminal control manipulation + if os.fork() == 0: + sys.exit(0) # exit the child process that was just obtained + else: + try: + os.tcsetpgrp(0, child_process_group_id) + except OSError: + pass + sys.exit(0) + else: + # Back to child + try: + os.setpgid(grandchild_pid, grandchild_pid) + except ProcessLookupError: + pass + os.tcsetpgrp(slave_fd, grandchild_pid) + if slave_fd > 2: + os.close(slave_fd) + os.waitpid(grandchild_pid, 0) + # Manipulate terminal control to create EIO condition + os.tcsetpgrp(master_fd, child_process_group_id) + # Now try to read from master - this might cause EIO + try: + os.read(master_fd, 1) + except OSError as e: + if e.errno == errno.EIO: + print(f"Setup created EIO condition: {e}", file=sys.stderr) + sys.exit(0) + except Exception as setup_e: + print(f"Setup error: {setup_e}", file=sys.stderr) + sys.exit(1) + else: + # Parent process + os.close(slave_fd) + os.waitpid(child_pid, 0) + # Now replace stdin with master_fd and try to read + os.dup2(master_fd, 0) + os.close(master_fd) + # This should now trigger EIO + print(f"Unexpectedly got input: {input()!r}", file=sys.stderr) + sys.exit(0) + except OSError as e: + if e.errno == errno.EIO: + print(f"Got EIO: {e}", file=sys.stderr) + sys.exit(1) + elif e.errno == errno.ENXIO: + print(f"Got ENXIO (no such device): {e}", file=sys.stderr) + sys.exit(1) # Treat ENXIO as success too + else: + print(f"Got other OSError: errno={e.errno} {e}", file=sys.stderr) + sys.exit(2) + except EOFError as e: + print(f"Got EOFError: {e}", file=sys.stderr) + sys.exit(3) + except Exception as e: + print(f"Got unexpected error: {type(e).__name__}: {e}", file=sys.stderr) + sys.exit(4) + + +if __name__ == "__main__": + # Set up signal handler for coordination + signal.signal(signal.SIGUSR1, lambda *a: create_eio_condition()) + print("READY", flush=True) + signal.pause() diff --git a/Lib/test/test_pyrepl/test_eventqueue.py b/Lib/test/test_pyrepl/test_eventqueue.py index edfe6ac4748f33..69d9612b70dc77 100644 --- a/Lib/test/test_pyrepl/test_eventqueue.py +++ b/Lib/test/test_pyrepl/test_eventqueue.py @@ -3,6 +3,8 @@ from unittest.mock import patch from test import support +from _pyrepl import terminfo + try: from _pyrepl.console import Event from _pyrepl import base_eventqueue @@ -172,17 +174,22 @@ def _push(keys): self.assertEqual(eq.get(), _event("key", "a")) +class EmptyTermInfo(terminfo.TermInfo): + def get(self, cap: str) -> bytes: + return b"" + + @unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows") class TestUnixEventQueue(EventQueueTestBase, unittest.TestCase): def setUp(self): - self.enterContext(patch("_pyrepl.curses.tigetstr", lambda x: b"")) self.file = tempfile.TemporaryFile() def tearDown(self) -> None: self.file.close() def make_eventqueue(self) -> base_eventqueue.BaseEventQueue: - return unix_eventqueue.EventQueue(self.file.fileno(), "utf-8") + ti = EmptyTermInfo("ansi") + return unix_eventqueue.EventQueue(self.file.fileno(), "utf-8", ti) @unittest.skipUnless(support.MS_WINDOWS, "No Windows event queue on Unix") diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index a20719033fc9b7..f0837ee94e9beb 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -1,7 +1,7 @@ import contextlib import io -import unittest import warnings +import unittest from unittest.mock import patch from textwrap import dedent @@ -293,7 +293,7 @@ def f(): """) with warnings.catch_warnings(record=True) as caught: - warnings.simplefilter("default") + warnings.simplefilter("always") console.runsource(code) count = sum("'return' in a 'finally' block" in str(w.message) diff --git a/Lib/test/test_pyrepl/test_pyrepl.py b/Lib/test/test_pyrepl/test_pyrepl.py index 93029ab6e080ba..c1485af524aebc 100644 --- a/Lib/test/test_pyrepl/test_pyrepl.py +++ b/Lib/test/test_pyrepl/test_pyrepl.py @@ -1,3 +1,4 @@ +import importlib import io import itertools import os @@ -8,10 +9,11 @@ import subprocess import sys import tempfile -from unittest import TestCase, skipUnless, skipIf +from pkgutil import ModuleInfo +from unittest import TestCase, skipUnless, skipIf, SkipTest from unittest.mock import patch -from test.support import force_not_colorized, make_clean_env -from test.support import SHORT_TIMEOUT, STDLIB_DIR +from test.support import force_not_colorized, make_clean_env, Py_DEBUG +from test.support import has_subprocess_support, SHORT_TIMEOUT, STDLIB_DIR from test.support.import_helper import import_module from test.support.os_helper import EnvironmentVarGuard, unlink @@ -25,9 +27,16 @@ code_to_events, ) from _pyrepl.console import Event -from _pyrepl._module_completer import ImportParser, ModuleCompleter -from _pyrepl.readline import (ReadlineAlikeReader, ReadlineConfig, - _ReadlineWrapper) +from _pyrepl._module_completer import ( + ImportParser, + ModuleCompleter, + HARDCODED_SUBMODULES, +) +from _pyrepl.readline import ( + ReadlineAlikeReader, + ReadlineConfig, + _ReadlineWrapper, +) from _pyrepl.readline import multiline_input as readline_multiline_input try: @@ -37,6 +46,10 @@ class ReplTestCase(TestCase): + def setUp(self): + if not has_subprocess_support: + raise SkipTest("test module requires subprocess") + def run_repl( self, repl_input: str | list[str], @@ -46,6 +59,7 @@ def run_repl( cwd: str | None = None, skip: bool = False, timeout: float = SHORT_TIMEOUT, + exit_on_output: str | None = None, ) -> tuple[str, int]: temp_dir = None if cwd is None: @@ -59,6 +73,7 @@ def run_repl( cwd=cwd, skip=skip, timeout=timeout, + exit_on_output=exit_on_output, ) finally: if temp_dir is not None: @@ -73,6 +88,7 @@ def _run_repl( cwd: str, skip: bool, timeout: float, + exit_on_output: str | None, ) -> tuple[str, int]: assert pty master_fd, slave_fd = pty.openpty() @@ -118,6 +134,11 @@ def _run_repl( except OSError: break output.append(data) + if exit_on_output is not None: + output = ["".join(output)] + if exit_on_output in output[0]: + process.kill() + break else: os.close(master_fd) process.kill() @@ -452,6 +473,11 @@ def test_auto_indent_default(self): ) # fmt: on + events = code_to_events(input_code) + reader = self.prepare_reader(events) + output = multiline_input(reader) + self.assertEqual(output, output_code) + def test_auto_indent_continuation(self): # auto indenting according to previous user indentation # fmt: off @@ -912,7 +938,13 @@ def test_func(self): class TestPyReplModuleCompleter(TestCase): def setUp(self): + # Make iter_modules() search only the standard library. + # This makes the test more reliable in case there are + # other user packages/scripts on PYTHONPATH which can + # interfere with the completions. + lib_path = os.path.dirname(importlib.__path__[0]) self._saved_sys_path = sys.path + sys.path = [lib_path] def tearDown(self): sys.path = self._saved_sys_path @@ -920,19 +952,12 @@ def tearDown(self): def prepare_reader(self, events, namespace): console = FakeConsole(events) config = ReadlineConfig() + config.module_completer = ModuleCompleter(namespace) config.readline_completer = rlcompleter.Completer(namespace).complete reader = ReadlineAlikeReader(console=console, config=config) return reader def test_import_completions(self): - import importlib - # Make iter_modules() search only the standard library. - # This makes the test more reliable in case there are - # other user packages/scripts on PYTHONPATH which can - # intefere with the completions. - lib_path = os.path.dirname(importlib.__path__[0]) - sys.path = [lib_path] - cases = ( ("import path\t\n", "import pathlib"), ("import importlib.\t\tres\t\n", "import importlib.resources"), @@ -954,10 +979,50 @@ def test_import_completions(self): output = reader.readline() self.assertEqual(output, expected) - def test_relative_import_completions(self): + @patch("pkgutil.iter_modules", lambda: [ModuleInfo(None, "public", True), + ModuleInfo(None, "_private", True)]) + @patch("sys.builtin_module_names", ()) + def test_private_completions(self): + cases = ( + # Return public methods by default + ("import \t\n", "import public"), + ("from \t\n", "from public"), + # Return private methods if explicitly specified + ("import _\t\n", "import _private"), + ("from _\t\n", "from _private"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + + @patch( + "_pyrepl._module_completer.ModuleCompleter.iter_submodules", + lambda *_: [ + ModuleInfo(None, "public", True), + ModuleInfo(None, "_private", True), + ], + ) + def test_sub_module_private_completions(self): + cases = ( + # Return public methods by default + ("from foo import \t\n", "from foo import public"), + # Return private methods if explicitly specified + ("from foo import _\t\n", "from foo import _private"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + + def test_builtin_completion_top_level(self): cases = ( - ("from .readl\t\n", "from .readline"), - ("from . import readl\t\n", "from . import readline"), + ("import bui\t\n", "import builtins"), + ("from bui\t\n", "from builtins"), ) for code, expected in cases: with self.subTest(code=code): @@ -966,8 +1031,22 @@ def test_relative_import_completions(self): output = reader.readline() self.assertEqual(output, expected) - @patch("pkgutil.iter_modules", lambda: [(None, 'valid_name', None), - (None, 'invalid-name', None)]) + def test_relative_import_completions(self): + cases = ( + (None, "from .readl\t\n", "from .readl"), + (None, "from . import readl\t\n", "from . import readl"), + ("_pyrepl", "from .readl\t\n", "from .readline"), + ("_pyrepl", "from . import readl\t\n", "from . import readline"), + ) + for package, code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={"__package__": package}) + output = reader.readline() + self.assertEqual(output, expected) + + @patch("pkgutil.iter_modules", lambda: [ModuleInfo(None, "valid_name", True), + ModuleInfo(None, "invalid-name", True)]) def test_invalid_identifiers(self): # Make sure modules which are not valid identifiers # are not suggested as those cannot be imported via 'import'. @@ -983,6 +1062,45 @@ def test_invalid_identifiers(self): output = reader.readline() self.assertEqual(output, expected) + def test_no_fallback_on_regular_completion(self): + cases = ( + ("import pri\t\n", "import pri"), + ("from pri\t\n", "from pri"), + ("from typing import Na\t\n", "from typing import Na"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + + def test_hardcoded_stdlib_submodules(self): + cases = ( + ("import collections.\t\n", "import collections.abc"), + ("from os import \t\n", "from os import path"), + ("import xml.parsers.expat.\t\te\t\n\n", "import xml.parsers.expat.errors"), + ("from xml.parsers.expat import \t\tm\t\n\n", "from xml.parsers.expat import model"), + ) + for code, expected in cases: + with self.subTest(code=code): + events = code_to_events(code) + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, expected) + + def test_hardcoded_stdlib_submodules_not_proposed_if_local_import(self): + with tempfile.TemporaryDirectory() as _dir: + dir = pathlib.Path(_dir) + (dir / "collections").mkdir() + (dir / "collections" / "__init__.py").touch() + (dir / "collections" / "foo.py").touch() + with patch.object(sys, "path", [dir, *sys.path]): + events = code_to_events("import collections.\t\n") + reader = self.prepare_reader(events, namespace={}) + output = reader.readline() + self.assertEqual(output, "import collections.foo") + def test_get_path_and_prefix(self): cases = ( ('', ('', '')), @@ -1050,11 +1168,15 @@ def test_parse(self): self.assertEqual(actual, parsed) # The parser should not get tripped up by any # other preceding statements - code = f'import xyz\n{code}' - with self.subTest(code=code): + _code = f'import xyz\n{code}' + parser = ImportParser(_code) + actual = parser.parse() + with self.subTest(code=_code): self.assertEqual(actual, parsed) - code = f'import xyz;{code}' - with self.subTest(code=code): + _code = f'import xyz;{code}' + parser = ImportParser(_code) + actual = parser.parse() + with self.subTest(code=_code): self.assertEqual(actual, parsed) def test_parse_error(self): @@ -1107,6 +1229,19 @@ def test_parse_error(self): with self.subTest(code=code): self.assertEqual(actual, None) + +class TestHardcodedSubmodules(TestCase): + def test_hardcoded_stdlib_submodules_are_importable(self): + for parent_path, submodules in HARDCODED_SUBMODULES.items(): + for module_name in submodules: + path = f"{parent_path}.{module_name}" + with self.subTest(path=path): + # We can't use importlib.util.find_spec here, + # since some hardcoded submodules parents are + # not proper packages + importlib.import_module(path) + + class TestPasteEvent(TestCase): def prepare_reader(self, events): console = FakeConsole(events) @@ -1271,6 +1406,9 @@ class TestDumbTerminal(ReplTestCase): def test_dumb_terminal_exits_cleanly(self): env = os.environ.copy() env.pop('PYTHON_BASIC_REPL', None) + # Ignore PYTHONSTARTUP to not pollute the output + # with an unrelated traceback. See GH-137568. + env.pop('PYTHONSTARTUP', None) env.update({"TERM": "dumb"}) output, exit_code = self.run_repl("exit()\n", env=env) self.assertEqual(exit_code, 0) @@ -1286,6 +1424,7 @@ def setUp(self): # Cleanup from PYTHON* variables to isolate from local # user settings, see #121359. Such variables should be # added later in test methods to patched os.environ. + super().setUp() patcher = patch('os.environ', new=make_clean_env()) self.addCleanup(patcher.stop) patcher.start() @@ -1327,7 +1466,7 @@ def _assertMatchOK( ) @force_not_colorized - def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False): + def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False, pythonstartup=False): clean_env = make_clean_env() clean_env["NO_COLOR"] = "1" # force_not_colorized doesn't touch subprocesses @@ -1336,9 +1475,13 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False blue.mkdir() mod = blue / "calx.py" mod.write_text("FOO = 42", encoding="utf-8") + startup = blue / "startup.py" + startup.write_text("BAR = 64", encoding="utf-8") commands = [ "print(f'^{" + var + "=}')" for var in expectations ] + ["exit()"] + if pythonstartup: + clean_env["PYTHONSTARTUP"] = str(startup) if as_file and as_module: self.fail("as_file and as_module are mutually exclusive") elif as_file: @@ -1357,7 +1500,13 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False skip=True, ) else: - self.fail("Choose one of as_file or as_module") + output, exit_code = self.run_repl( + commands, + cmdline_args=[], + env=clean_env, + cwd=td, + skip=True, + ) self.assertEqual(exit_code, 0) for var, expected in expectations.items(): @@ -1370,6 +1519,23 @@ def _run_repl_globals_test(self, expectations, *, as_file=False, as_module=False self.assertNotIn("Exception", output) self.assertNotIn("Traceback", output) + def test_globals_initialized_as_default(self): + expectations = { + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations) + + def test_globals_initialized_from_pythonstartup(self): + expectations = { + "BAR": "64", + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations, pythonstartup=True) + def test_inspect_keeps_globals_from_inspected_file(self): expectations = { "FOO": "42", @@ -1379,6 +1545,16 @@ def test_inspect_keeps_globals_from_inspected_file(self): } self._run_repl_globals_test(expectations, as_file=True) + def test_inspect_keeps_globals_from_inspected_file_with_pythonstartup(self): + expectations = { + "FOO": "42", + "BAR": "64", + "__name__": "'__main__'", + "__package__": "None", + # "__file__" is missing in -i, like in the basic REPL + } + self._run_repl_globals_test(expectations, as_file=True, pythonstartup=True) + def test_inspect_keeps_globals_from_inspected_module(self): expectations = { "FOO": "42", @@ -1388,26 +1564,32 @@ def test_inspect_keeps_globals_from_inspected_module(self): } self._run_repl_globals_test(expectations, as_module=True) + def test_inspect_keeps_globals_from_inspected_module_with_pythonstartup(self): + expectations = { + "FOO": "42", + "BAR": "64", + "__name__": "'__main__'", + "__package__": "'blue'", + "__file__": re.compile(r"^'.*calx.py'$"), + } + self._run_repl_globals_test(expectations, as_module=True, pythonstartup=True) + @force_not_colorized def test_python_basic_repl(self): env = os.environ.copy() - commands = ("from test.support import initialized_with_pyrepl\n" - "initialized_with_pyrepl()\n" - "exit()\n") - + pyrepl_commands = "clear\nexit()\n" env.pop("PYTHON_BASIC_REPL", None) - output, exit_code = self.run_repl(commands, env=env, skip=True) + output, exit_code = self.run_repl(pyrepl_commands, env=env, skip=True) self.assertEqual(exit_code, 0) - self.assertIn("True", output) - self.assertNotIn("False", output) self.assertNotIn("Exception", output) + self.assertNotIn("NameError", output) self.assertNotIn("Traceback", output) + basic_commands = "help\nexit()\n" env["PYTHON_BASIC_REPL"] = "1" - output, exit_code = self.run_repl(commands, env=env) + output, exit_code = self.run_repl(basic_commands, env=env) self.assertEqual(exit_code, 0) - self.assertIn("False", output) - self.assertNotIn("True", output) + self.assertIn("Type help() for interactive help", output) self.assertNotIn("Exception", output) self.assertNotIn("Traceback", output) @@ -1544,6 +1726,17 @@ def test_null_byte(self): self.assertEqual(exit_code, 0) self.assertNotIn("TypeError", output) + @force_not_colorized + def test_non_string_suggestion_candidates(self): + commands = ("import runpy\n" + "runpy._run_module_code('blech', {0: '', 'bluch': ''}, '')\n" + "exit()\n") + + output, exit_code = self.run_repl(commands) + self.assertEqual(exit_code, 0) + self.assertNotIn("all elements in 'candidates' must be strings", output) + self.assertIn("bluch", output) + def test_readline_history_file(self): # skip, if readline module is not available readline = import_module('readline') @@ -1574,18 +1767,24 @@ def test_history_survive_crash(self): commands = "1\n2\n3\nexit()\n" output, exit_code = self.run_repl(commands, env=env, skip=True) + self.assertEqual(exit_code, 0) - commands = "spam\nimport time\ntime.sleep(1000)\nquit\n" - try: - self.run_repl(commands, env=env, timeout=3) - except AssertionError: - pass + # Run until "0xcafe" is printed (as "51966") and then kill the + # process to simulate a crash. Note that the output also includes + # the echoed input commands. + commands = "spam\nimport time\n0xcafe\ntime.sleep(1000)\nquit\n" + output, exit_code = self.run_repl(commands, env=env, + exit_on_output="51966") + self.assertNotEqual(exit_code, 0) history = pathlib.Path(hfile.name).read_text() self.assertIn("2", history) self.assertIn("exit()", history) self.assertIn("spam", history) self.assertIn("import time", history) + # History is written after each command's output is printed to the + # console, so depending on how quickly the process is killed, + # the last command may or may not be written to the history file. self.assertNotIn("sleep", history) self.assertNotIn("quit", history) @@ -1605,3 +1804,100 @@ def test_prompt_after_help(self): # Extra stuff (newline and `exit` rewrites) are necessary # because of how run_repl works. self.assertNotIn(">>> \n>>> >>>", cleaned_output) + + @skipUnless(Py_DEBUG, '-X showrefcount requires a Python debug build') + def test_showrefcount(self): + env = os.environ.copy() + env.pop("PYTHON_BASIC_REPL", "") + output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) + matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) + self.assertEqual(len(matches), 3) + + env["PYTHON_BASIC_REPL"] = "1" + output, _ = self.run_repl("1\n1+2\nexit()\n", cmdline_args=['-Xshowrefcount'], env=env) + matches = re.findall(r'\[-?\d+ refs, \d+ blocks\]', output) + self.assertEqual(len(matches), 3) + + +class TestPyReplCtrlD(TestCase): + """Test Ctrl+D behavior in _pyrepl to match old pre-3.13 REPL behavior. + + Ctrl+D should: + - Exit on empty buffer (raises EOFError) + - Delete character when cursor is in middle of line + - Perform no operation when cursor is at end of line without newline + - Exit multiline mode when cursor is at end with trailing newline + - Run code up to that point when pressed on blank line with preceding lines + """ + def prepare_reader(self, events): + console = FakeConsole(events) + config = ReadlineConfig(readline_completer=None) + reader = ReadlineAlikeReader(console=console, config=config) + return reader + + def test_ctrl_d_empty_line(self): + """Test that pressing Ctrl+D on empty line exits the program""" + events = [ + Event(evt="key", data="\x04", raw=bytearray(b"\x04")), # Ctrl+D + ] + reader = self.prepare_reader(events) + with self.assertRaises(EOFError): + multiline_input(reader) + + def test_ctrl_d_multiline_with_new_line(self): + """Test that pressing Ctrl+D in multiline mode with trailing newline exits multiline mode""" + events = itertools.chain( + code_to_events("def f():\n pass\n"), # Enter multiline mode with trailing newline + [ + Event(evt="key", data="\x04", raw=bytearray(b"\x04")), # Ctrl+D + ], + ) + reader, _ = handle_all_events(events) + self.assertTrue(reader.finished) + self.assertEqual("def f():\n pass\n", "".join(reader.buffer)) + + def test_ctrl_d_multiline_middle_of_line(self): + """Test that pressing Ctrl+D in multiline mode with cursor in middle deletes character""" + events = itertools.chain( + code_to_events("def f():\n hello world"), # Enter multiline mode + [ + Event(evt="key", data="left", raw=bytearray(b"\x1bOD")) + ] * 5, # move cursor to 'w' in "world" + [ + Event(evt="key", data="\x04", raw=bytearray(b"\x04")) + ], # Ctrl+D should delete 'w' + ) + reader, _ = handle_all_events(events) + self.assertFalse(reader.finished) + self.assertEqual("def f():\n hello orld", "".join(reader.buffer)) + + def test_ctrl_d_multiline_end_of_line_no_newline(self): + """Test that pressing Ctrl+D at end of line without newline performs no operation""" + events = itertools.chain( + code_to_events("def f():\n hello"), # Enter multiline mode, no trailing newline + [ + Event(evt="key", data="\x04", raw=bytearray(b"\x04")) + ], # Ctrl+D should be no-op + ) + reader, _ = handle_all_events(events) + self.assertFalse(reader.finished) + self.assertEqual("def f():\n hello", "".join(reader.buffer)) + + def test_ctrl_d_single_line_middle_of_line(self): + """Test that pressing Ctrl+D in single line mode deletes current character""" + events = itertools.chain( + code_to_events("hello"), + [Event(evt="key", data="left", raw=bytearray(b"\x1bOD"))], # move left + [Event(evt="key", data="\x04", raw=bytearray(b"\x04"))], # Ctrl+D + ) + reader, _ = handle_all_events(events) + self.assertEqual("hell", "".join(reader.buffer)) + + def test_ctrl_d_single_line_end_no_newline(self): + """Test that pressing Ctrl+D at end of single line without newline does nothing""" + events = itertools.chain( + code_to_events("hello"), # cursor at end of line + [Event(evt="key", data="\x04", raw=bytearray(b"\x04"))], # Ctrl+D + ) + reader, _ = handle_all_events(events) + self.assertEqual("hello", "".join(reader.buffer)) diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 4ee320a5a4dabb..b1b6ae16a1e592 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -375,8 +375,10 @@ def funct(case: str = sys.platform) -> None: ) match case: case "emscripten": print("on the web") - case "ios" | "android": print("on the phone") + case "ios" | "android": + print("on the phone") case _: print('arms around', match.group(1)) + type type = type[type] """ ) expected = dedent( @@ -393,8 +395,10 @@ def funct(case: str = sys.platform) -> None: {o}){z} {K}match{z} case{o}:{z} {K}case{z} {s}"emscripten"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the web"{z}{o}){z} - {K}case{z} {s}"ios"{z} {o}|{z} {s}"android"{z}{o}:{z} {b}print{z}{o}({z}{s}"on the phone"{z}{o}){z} + {K}case{z} {s}"ios"{z} {o}|{z} {s}"android"{z}{o}:{z} + {b}print{z}{o}({z}{s}"on the phone"{z}{o}){z} {K}case{z} {K}_{z}{o}:{z} {b}print{z}{o}({z}{s}'arms around'{z}{o},{z} match{o}.{z}group{o}({z}{n}1{z}{o}){z}{o}){z} + {K}type{z} {b}type{z} {o}={z} {b}type{z}{o}[{z}{b}type{z}{o}]{z} """ ) expected_sync = expected.format(a="", **colors) @@ -402,14 +406,14 @@ def funct(case: str = sys.platform) -> None: reader, _ = handle_all_events(events) self.assert_screen_equal(reader, code, clean=True) self.assert_screen_equal(reader, expected_sync) - self.assertEqual(reader.pos, 2**7 + 2**8) - self.assertEqual(reader.cxy, (0, 14)) + self.assertEqual(reader.pos, 419) + self.assertEqual(reader.cxy, (0, 16)) async_msg = "{k}async{z} ".format(**colors) expected_async = expected.format(a=async_msg, **colors) more_events = itertools.chain( code_to_events(code), - [Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))] * 13, + [Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))] * 15, code_to_events("async "), ) reader, _ = handle_all_events(more_events) @@ -497,6 +501,57 @@ def unfinished_function(): self.assert_screen_equal(reader, code, clean=True) self.assert_screen_equal(reader, expected) + def test_syntax_highlighting_indentation_error(self): + code = dedent( + """\ + def unfinished_function(): + var = 1 + oops + """ + ) + expected = dedent( + """\ + {k}def{z} {d}unfinished_function{z}{o}({z}{o}){z}{o}:{z} + var {o}={z} {n}1{z} + oops + """ + ).format(**colors) + events = code_to_events(code) + reader, _ = handle_all_events(events) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected) + + def test_syntax_highlighting_literal_brace_in_fstring_or_tstring(self): + code = dedent( + """\ + f"{{" + f"}}" + f"a{{b" + f"a}}b" + f"a{{b}}c" + t"a{{b}}c" + f"{{{0}}}" + f"{ {0} }" + """ + ) + expected = dedent( + """\ + {s}f"{z}{s}<<{z}{s}"{z} + {s}f"{z}{s}>>{z}{s}"{z} + {s}f"{z}{s}a<<{z}{s}b{z}{s}"{z} + {s}f"{z}{s}a>>{z}{s}b{z}{s}"{z} + {s}f"{z}{s}a<<{z}{s}b>>{z}{s}c{z}{s}"{z} + {s}t"{z}{s}a<<{z}{s}b>>{z}{s}c{z}{s}"{z} + {s}f"{z}{s}<<{z}{o}<{z}{n}0{z}{o}>{z}{s}>>{z}{s}"{z} + {s}f"{z}{o}<{z} {o}<{z}{n}0{z}{o}>{z} {o}>{z}{s}"{z} + """ + ).format(**colors).replace("<", "{").replace(">", "}") + events = code_to_events(code) + reader, _ = handle_all_events(events) + self.assert_screen_equal(reader, code, clean=True) + self.maxDiff=None + self.assert_screen_equal(reader, expected) + def test_control_characters(self): code = 'flag = "🏳️‍🌈"' events = code_to_events(code) diff --git a/Lib/test/test_pyrepl/test_terminfo.py b/Lib/test/test_pyrepl/test_terminfo.py new file mode 100644 index 00000000000000..562cf5c905bd67 --- /dev/null +++ b/Lib/test/test_pyrepl/test_terminfo.py @@ -0,0 +1,651 @@ +"""Tests comparing PyREPL's pure Python curses implementation with the standard curses module.""" + +import json +import os +import subprocess +import sys +import unittest +from test.support import requires, has_subprocess_support +from textwrap import dedent + +# Only run these tests if curses is available +requires("curses") + +try: + import _curses +except ImportError: + try: + import curses as _curses + except ImportError: + _curses = None + +from _pyrepl import terminfo + + +ABSENT_STRING = terminfo.ABSENT_STRING +CANCELLED_STRING = terminfo.CANCELLED_STRING + + +class TestCursesCompatibility(unittest.TestCase): + """Test that PyREPL's curses implementation matches the standard curses behavior. + + Python's `curses` doesn't allow calling `setupterm()` again with a different + $TERM in the same process, so we subprocess all `curses` tests to get correctly + set up terminfo.""" + + @classmethod + def setUpClass(cls): + if _curses is None: + raise unittest.SkipTest( + "`curses` capability provided to regrtest but `_curses` not importable" + ) + + if not has_subprocess_support: + raise unittest.SkipTest("test module requires subprocess") + + # we need to ensure there's a terminfo database on the system and that + # `infocmp` works + cls.infocmp("dumb") + + def setUp(self): + self.original_term = os.environ.get("TERM", None) + + def tearDown(self): + if self.original_term is not None: + os.environ["TERM"] = self.original_term + elif "TERM" in os.environ: + del os.environ["TERM"] + + @classmethod + def infocmp(cls, term) -> list[str]: + all_caps = [] + try: + result = subprocess.run( + ["infocmp", "-l1", term], + capture_output=True, + text=True, + check=True, + ) + except Exception: + raise unittest.SkipTest("calling `infocmp` failed on the system") + + for line in result.stdout.splitlines(): + line = line.strip() + if line.startswith("#"): + if "terminfo" not in line and "termcap" in line: + # PyREPL terminfo doesn't parse termcap databases + raise unittest.SkipTest( + "curses using termcap.db: no terminfo database on" + " the system" + ) + elif "=" in line: + cap_name = line.split("=")[0] + all_caps.append(cap_name) + + return all_caps + + def test_setupterm_basic(self): + """Test basic setupterm functionality.""" + # Test with explicit terminal type + test_terms = ["xterm", "xterm-256color", "vt100", "ansi"] + + for term in test_terms: + with self.subTest(term=term): + ncurses_code = dedent( + f""" + import _curses + import json + try: + _curses.setupterm({repr(term)}, 1) + print(json.dumps({{"success": True}})) + except Exception as e: + print(json.dumps({{"success": False, "error": str(e)}})) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + ncurses_data = json.loads(result.stdout) + std_success = ncurses_data["success"] + + # Set up with PyREPL curses + try: + terminfo.TermInfo(term, fallback=False) + pyrepl_success = True + except Exception as e: + pyrepl_success = False + pyrepl_error = e + + # Both should succeed or both should fail + if std_success: + self.assertTrue( + pyrepl_success, + f"Standard curses succeeded but PyREPL failed for {term}", + ) + else: + # If standard curses failed, PyREPL might still succeed with fallback + # This is acceptable as PyREPL has hardcoded fallbacks + pass + + def test_setupterm_none(self): + """Test setupterm with None (uses TERM from environment).""" + # Test with current TERM + ncurses_code = dedent( + """ + import _curses + import json + try: + _curses.setupterm(None, 1) + print(json.dumps({"success": True})) + except Exception as e: + print(json.dumps({"success": False, "error": str(e)})) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + ncurses_data = json.loads(result.stdout) + std_success = ncurses_data["success"] + + try: + terminfo.TermInfo(None, fallback=False) + pyrepl_success = True + except Exception: + pyrepl_success = False + + # Both should have same result + if std_success: + self.assertTrue( + pyrepl_success, + "Standard curses succeeded but PyREPL failed for None", + ) + + def test_tigetstr_common_capabilities(self): + """Test tigetstr for common terminal capabilities.""" + # Test with a known terminal type + term = "xterm" + + # Get ALL capabilities from infocmp + all_caps = self.infocmp(term) + + ncurses_code = dedent( + f""" + import _curses + import json + _curses.setupterm({repr(term)}, 1) + results = {{}} + for cap in {repr(all_caps)}: + try: + val = _curses.tigetstr(cap) + if val is None: + results[cap] = None + elif val == -1: + results[cap] = -1 + else: + results[cap] = list(val) + except BaseException: + results[cap] = "error" + print(json.dumps(results)) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + self.assertEqual( + result.returncode, 0, f"Failed to run ncurses: {result.stderr}" + ) + + ncurses_data = json.loads(result.stdout) + + ti = terminfo.TermInfo(term, fallback=False) + + # Test every single capability + for cap in all_caps: + if cap not in ncurses_data or ncurses_data[cap] == "error": + continue + + with self.subTest(capability=cap): + ncurses_val = ncurses_data[cap] + if isinstance(ncurses_val, list): + ncurses_val = bytes(ncurses_val) + + pyrepl_val = ti.get(cap) + + self.assertEqual( + pyrepl_val, + ncurses_val, + f"Capability {cap}: ncurses={repr(ncurses_val)}, " + f"pyrepl={repr(pyrepl_val)}", + ) + + def test_tigetstr_input_types(self): + """Test tigetstr with different input types.""" + term = "xterm" + cap = "cup" + + # Test standard curses behavior with string in subprocess + ncurses_code = dedent( + f""" + import _curses + import json + _curses.setupterm({repr(term)}, 1) + + # Test with string input + try: + std_str_result = _curses.tigetstr({repr(cap)}) + std_accepts_str = True + if std_str_result is None: + std_str_val = None + elif std_str_result == -1: + std_str_val = -1 + else: + std_str_val = list(std_str_result) + except TypeError: + std_accepts_str = False + std_str_val = None + + print(json.dumps({{ + "accepts_str": std_accepts_str, + "str_result": std_str_val + }})) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + ncurses_data = json.loads(result.stdout) + + # PyREPL setup + ti = terminfo.TermInfo(term, fallback=False) + + # PyREPL behavior with string + try: + pyrepl_str_result = ti.get(cap) + pyrepl_accepts_str = True + except TypeError: + pyrepl_accepts_str = False + + # PyREPL should also only accept strings for compatibility + with self.assertRaises(TypeError): + ti.get(cap.encode("ascii")) + + # Both should accept string input + self.assertEqual( + pyrepl_accepts_str, + ncurses_data["accepts_str"], + "PyREPL and standard curses should have same string handling", + ) + self.assertTrue( + pyrepl_accepts_str, "PyREPL should accept string input" + ) + + def test_tparm_basic(self): + """Test basic tparm functionality.""" + term = "xterm" + ti = terminfo.TermInfo(term, fallback=False) + + # Test cursor positioning (cup) + cup = ti.get("cup") + if cup and cup not in {ABSENT_STRING, CANCELLED_STRING}: + # Test various parameter combinations + test_cases = [ + (0, 0), # Top-left + (5, 10), # Arbitrary position + (23, 79), # Bottom-right of standard terminal + (999, 999), # Large values + ] + + # Get ncurses results in subprocess + ncurses_code = dedent( + f""" + import _curses + import json + _curses.setupterm({repr(term)}, 1) + + # Get cup capability + cup = _curses.tigetstr('cup') + results = {{}} + + for row, col in {repr(test_cases)}: + try: + result = _curses.tparm(cup, row, col) + results[f"{{row}},{{col}}"] = list(result) + except Exception as e: + results[f"{{row}},{{col}}"] = {{"error": str(e)}} + + print(json.dumps(results)) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + self.assertEqual( + result.returncode, 0, f"Failed to run ncurses: {result.stderr}" + ) + ncurses_data = json.loads(result.stdout) + + for row, col in test_cases: + with self.subTest(row=row, col=col): + # Standard curses tparm from subprocess + key = f"{row},{col}" + if ( + isinstance(ncurses_data[key], dict) + and "error" in ncurses_data[key] + ): + self.fail( + f"ncurses tparm failed: {ncurses_data[key]['error']}" + ) + std_result = bytes(ncurses_data[key]) + + # PyREPL curses tparm + pyrepl_result = terminfo.tparm(cup, row, col) + + # Results should be identical + self.assertEqual( + pyrepl_result, + std_result, + f"tparm(cup, {row}, {col}): " + f"std={repr(std_result)}, pyrepl={repr(pyrepl_result)}", + ) + else: + raise unittest.SkipTest( + "test_tparm_basic() requires the `cup` capability" + ) + + def test_tparm_multiple_params(self): + """Test tparm with capabilities using multiple parameters.""" + term = "xterm" + ti = terminfo.TermInfo(term, fallback=False) + + # Test capabilities that take parameters + param_caps = { + "cub": 1, # cursor_left with count + "cuf": 1, # cursor_right with count + "cuu": 1, # cursor_up with count + "cud": 1, # cursor_down with count + "dch": 1, # delete_character with count + "ich": 1, # insert_character with count + } + + # Get all capabilities from PyREPL first + pyrepl_caps = {} + for cap in param_caps: + cap_value = ti.get(cap) + if cap_value and cap_value not in { + ABSENT_STRING, + CANCELLED_STRING, + }: + pyrepl_caps[cap] = cap_value + + if not pyrepl_caps: + self.skipTest("No parametrized capabilities found") + + # Get ncurses results in subprocess + ncurses_code = dedent( + f""" + import _curses + import json + _curses.setupterm({repr(term)}, 1) + + param_caps = {repr(param_caps)} + test_values = [1, 5, 10, 99] + results = {{}} + + for cap in param_caps: + cap_value = _curses.tigetstr(cap) + if cap_value and cap_value != -1: + for value in test_values: + try: + result = _curses.tparm(cap_value, value) + results[f"{{cap}},{{value}}"] = list(result) + except Exception as e: + results[f"{{cap}},{{value}}"] = {{"error": str(e)}} + + print(json.dumps(results)) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + self.assertEqual( + result.returncode, 0, f"Failed to run ncurses: {result.stderr}" + ) + ncurses_data = json.loads(result.stdout) + + for cap, cap_value in pyrepl_caps.items(): + with self.subTest(capability=cap): + # Test with different parameter values + for value in [1, 5, 10, 99]: + key = f"{cap},{value}" + if key in ncurses_data: + if ( + isinstance(ncurses_data[key], dict) + and "error" in ncurses_data[key] + ): + self.fail( + f"ncurses tparm failed: {ncurses_data[key]['error']}" + ) + std_result = bytes(ncurses_data[key]) + + pyrepl_result = terminfo.tparm(cap_value, value) + self.assertEqual( + pyrepl_result, + std_result, + f"tparm({cap}, {value}): " + f"std={repr(std_result)}, pyrepl={repr(pyrepl_result)}", + ) + + def test_tparm_null_handling(self): + """Test tparm with None/null input.""" + term = "xterm" + + ncurses_code = dedent( + f""" + import _curses + import json + _curses.setupterm({repr(term)}, 1) + + # Test with None + try: + _curses.tparm(None) + raises_typeerror = False + except TypeError: + raises_typeerror = True + except Exception as e: + raises_typeerror = False + error_type = type(e).__name__ + + print(json.dumps({{"raises_typeerror": raises_typeerror}})) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + ncurses_data = json.loads(result.stdout) + + # PyREPL setup + ti = terminfo.TermInfo(term, fallback=False) + + # Test with None - both should raise TypeError + if ncurses_data["raises_typeerror"]: + with self.assertRaises(TypeError): + terminfo.tparm(None) + else: + # If ncurses doesn't raise TypeError, PyREPL shouldn't either + try: + terminfo.tparm(None) + except TypeError: + self.fail("PyREPL raised TypeError but ncurses did not") + + def test_special_terminals(self): + """Test with special terminal types.""" + special_terms = [ + "dumb", # Minimal terminal + "unknown", # Should fall back to defaults + "linux", # Linux console + "screen", # GNU Screen + "tmux", # tmux + ] + + # Get all string capabilities from ncurses + for term in special_terms: + with self.subTest(term=term): + all_caps = self.infocmp(term) + ncurses_code = dedent( + f""" + import _curses + import json + import sys + + try: + _curses.setupterm({repr(term)}, 1) + results = {{}} + for cap in {repr(all_caps)}: + try: + val = _curses.tigetstr(cap) + if val is None: + results[cap] = None + elif val == -1: + results[cap] = -1 + else: + # Convert bytes to list of ints for JSON + results[cap] = list(val) + except BaseException: + results[cap] = "error" + print(json.dumps(results)) + except Exception as e: + print(json.dumps({{"error": str(e)}})) + """ + ) + + # Get ncurses results + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + if result.returncode != 0: + self.fail( + f"Failed to get ncurses data for {term}: {result.stderr}" + ) + + try: + ncurses_data = json.loads(result.stdout) + except json.JSONDecodeError: + self.fail( + f"Failed to parse ncurses output for {term}: {result.stdout}" + ) + + if "error" in ncurses_data and len(ncurses_data) == 1: + # ncurses failed to setup this terminal + # PyREPL should still work with fallback + ti = terminfo.TermInfo(term, fallback=True) + continue + + ti = terminfo.TermInfo(term, fallback=False) + + # Compare all capabilities + for cap in all_caps: + if cap not in ncurses_data: + continue + + with self.subTest(term=term, capability=cap): + ncurses_val = ncurses_data[cap] + if isinstance(ncurses_val, list): + # Convert back to bytes + ncurses_val = bytes(ncurses_val) + + pyrepl_val = ti.get(cap) + + # Both should return the same value + self.assertEqual( + pyrepl_val, + ncurses_val, + f"Capability {cap} for {term}: " + f"ncurses={repr(ncurses_val)}, " + f"pyrepl={repr(pyrepl_val)}", + ) + + def test_terminfo_fallback(self): + """Test that PyREPL falls back gracefully when terminfo is not found.""" + # Use a non-existent terminal type + fake_term = "nonexistent-terminal-type-12345" + + # Check if standard curses can setup this terminal in subprocess + ncurses_code = dedent( + f""" + import _curses + import json + try: + _curses.setupterm({repr(fake_term)}, 1) + print(json.dumps({{"success": True}})) + except _curses.error: + print(json.dumps({{"success": False, "error": "curses.error"}})) + except Exception as e: + print(json.dumps({{"success": False, "error": str(e)}})) + """ + ) + + result = subprocess.run( + [sys.executable, "-c", ncurses_code], + capture_output=True, + text=True, + ) + ncurses_data = json.loads(result.stdout) + + if ncurses_data["success"]: + # If it succeeded, skip this test as we can't test fallback + self.skipTest( + f"System unexpectedly has terminfo for '{fake_term}'" + ) + + # PyREPL should succeed with fallback + try: + ti = terminfo.TermInfo(fake_term, fallback=True) + pyrepl_ok = True + except Exception: + pyrepl_ok = False + + self.assertTrue( + pyrepl_ok, "PyREPL should fall back for unknown terminals" + ) + + # Should still be able to get basic capabilities + bel = ti.get("bel") + self.assertIsNotNone( + bel, "PyREPL should provide basic capabilities after fallback" + ) + + def test_invalid_terminal_names(self): + cases = [ + (42, TypeError), + ("", ValueError), + ("w\x00t", ValueError), + (f"..{os.sep}name", ValueError), + ] + + for term, exc in cases: + with self.subTest(term=term): + with self.assertRaises(exc): + terminfo._validate_terminal_name_or_raise(term) diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index c447b310c49a06..f4fb9237ffdfd0 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -1,12 +1,18 @@ +import errno import itertools import os +import signal +import subprocess import sys +import threading import unittest from functools import partial +from test import support from test.support import os_helper, force_not_colorized_test_class +from test.support import script_helper, threading_helper from unittest import TestCase -from unittest.mock import MagicMock, call, patch, ANY +from unittest.mock import MagicMock, call, patch, ANY, Mock from .support import handle_all_events, code_to_events @@ -16,10 +22,15 @@ except ImportError: pass +from _pyrepl.terminfo import _TERMINAL_CAPABILITIES + +TERM_CAPABILITIES = _TERMINAL_CAPABILITIES["ansi"] + def unix_console(events, **kwargs): - console = UnixConsole() + console = UnixConsole(term="xterm") console.get_event = MagicMock(side_effect=events) + console.getpending = MagicMock(return_value=Event("key", "")) height = kwargs.get("height", 25) width = kwargs.get("width", 80) @@ -49,41 +60,11 @@ def unix_console(events, **kwargs): ) -TERM_CAPABILITIES = { - "bel": b"\x07", - "civis": b"\x1b[?25l", - "clear": b"\x1b[H\x1b[2J", - "cnorm": b"\x1b[?12l\x1b[?25h", - "cub": b"\x1b[%p1%dD", - "cub1": b"\x08", - "cud": b"\x1b[%p1%dB", - "cud1": b"\n", - "cuf": b"\x1b[%p1%dC", - "cuf1": b"\x1b[C", - "cup": b"\x1b[%i%p1%d;%p2%dH", - "cuu": b"\x1b[%p1%dA", - "cuu1": b"\x1b[A", - "dch1": b"\x1b[P", - "dch": b"\x1b[%p1%dP", - "el": b"\x1b[K", - "hpa": b"\x1b[%i%p1%dG", - "ich": b"\x1b[%p1%d@", - "ich1": None, - "ind": b"\n", - "pad": None, - "ri": b"\x1bM", - "rmkx": b"\x1b[?1l\x1b>", - "smkx": b"\x1b[?1h\x1b=", -} - - @unittest.skipIf(sys.platform == "win32", "No Unix event queue on Windows") -@patch("_pyrepl.curses.tigetstr", lambda s: TERM_CAPABILITIES.get(s)) @patch( - "_pyrepl.curses.tparm", + "_pyrepl.terminfo.tparm", lambda s, *args: s + b":" + b",".join(str(i).encode() for i in args), ) -@patch("_pyrepl.curses.setupterm", lambda a, b: None) @patch( "termios.tcgetattr", lambda _: [ @@ -320,7 +301,7 @@ def same_console(events): def test_getheightwidth_with_invalid_environ(self, _os_write): # gh-128636 - console = UnixConsole() + console = UnixConsole(term="xterm") with os_helper.EnvironmentVarGuard() as env: env["LINES"] = "" self.assertIsInstance(console.getheightwidth(), tuple) @@ -328,3 +309,80 @@ def test_getheightwidth_with_invalid_environ(self, _os_write): self.assertIsInstance(console.getheightwidth(), tuple) os.environ = [] self.assertIsInstance(console.getheightwidth(), tuple) + + @unittest.skipUnless(sys.platform == "darwin", "requires macOS") + def test_restore_with_invalid_environ_on_macos(self, _os_write): + # gh-128636 for macOS + console = UnixConsole(term="xterm") + with os_helper.EnvironmentVarGuard(): + os.environ = [] + console.prepare() # needed to call restore() + console.restore() # this should succeed + + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_restore_in_thread(self, _os_write): + # gh-139391: ensure that console.restore() silently suppresses + # exceptions when calling signal.signal() from a non-main thread. + console = unix_console([]) + console.old_sigwinch = signal.SIG_DFL + thread = threading.Thread(target=console.restore) + thread.start() + thread.join() # this should not raise + + +@unittest.skipIf(sys.platform == "win32", "No Unix console on Windows") +class TestUnixConsoleEIOHandling(TestCase): + + @patch('_pyrepl.unix_console.tcsetattr') + @patch('_pyrepl.unix_console.tcgetattr') + def test_eio_error_handling_in_restore(self, mock_tcgetattr, mock_tcsetattr): + + import termios + mock_termios = Mock() + mock_termios.iflag = 0 + mock_termios.oflag = 0 + mock_termios.cflag = 0 + mock_termios.lflag = 0 + mock_termios.cc = [0] * 32 + mock_termios.copy.return_value = mock_termios + mock_tcgetattr.return_value = mock_termios + + console = UnixConsole(term="xterm") + console.prepare() + + mock_tcsetattr.side_effect = termios.error(errno.EIO, "Input/output error") + + # EIO error should be handled gracefully in restore() + console.restore() + + @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux") + def test_repl_eio(self): + # Use the pty-based approach to simulate EIO error + script_path = os.path.join(os.path.dirname(__file__), "eio_test_script.py") + + proc = script_helper.spawn_python( + "-S", script_path, + stderr=subprocess.PIPE, + text=True + ) + + ready_line = proc.stdout.readline().strip() + if ready_line != "READY" or proc.poll() is not None: + self.fail("Child process failed to start properly") + + os.kill(proc.pid, signal.SIGUSR1) + # sleep for pty to settle + _, err = proc.communicate(timeout=support.LONG_TIMEOUT) + self.assertEqual( + proc.returncode, + 1, + f"Expected EIO/ENXIO error, got return code {proc.returncode}", + ) + self.assertTrue( + ( + "Got EIO:" in err + or "Got ENXIO:" in err + ), + f"Expected EIO/ENXIO error message in stderr: {err}", + ) diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 8ce1e5371386f0..656a1e441e0e47 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -1,14 +1,33 @@ from unittest import TestCase -from _pyrepl.utils import str_width, wlen, prev_next_window +from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors class TestUtils(TestCase): def test_str_width(self): - characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9'] + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', + ] for c in characters: self.assertEqual(str_width(c), 1) + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(str_width(c), 0) + characters = [chr(99989), chr(99999)] for c in characters: self.assertEqual(str_width(c), 2) @@ -25,6 +44,8 @@ def test_wlen(self): self.assertEqual(wlen('hello'), 5) self.assertEqual(wlen('hello' + '\x1a'), 7) + self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) def test_prev_next_window(self): def gen_normal(): @@ -60,3 +81,25 @@ def gen_raise(): self.assertEqual(next(pnw), (3, 4, None)) with self.assertRaises(ZeroDivisionError): next(pnw) + + def test_gen_colors_keyword_highlighting(self): + cases = [ + # no highlights + ("a.set", [(".", "op")]), + ("obj.list", [(".", "op")]), + ("obj.match", [(".", "op")]), + ("b. \\\n format", [(".", "op")]), + # highlights + ("set", [("set", "builtin")]), + ("list", [("list", "builtin")]), + (" \n dict", [("dict", "builtin")]), + ] + for code, expected_highlights in cases: + with self.subTest(code=code): + colors = list(gen_colors(code)) + # Extract (text, tag) pairs for comparison + actual_highlights = [] + for color in colors: + span_text = code[color.span.start:color.span.end + 1] + actual_highlights.append((span_text, color.tag)) + self.assertEqual(actual_highlights, expected_highlights) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index e7bab226b31ddf..f9607e02c604ff 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -35,6 +35,7 @@ class WindowsConsoleTests(TestCase): def console(self, events, **kwargs) -> Console: console = WindowsConsole() console.get_event = MagicMock(side_effect=events) + console.getpending = MagicMock(return_value=Event("key", "")) console.wait = MagicMock() console._scroll = MagicMock() console._hide_cursor = MagicMock() @@ -385,6 +386,7 @@ def get_event(self, input_records, **kwargs) -> Console: self.console._read_input = self.mock self.console._WindowsConsole__vt_support = kwargs.get("vt_support", False) + self.console.wait = MagicMock(return_value=True) event = self.console.get_event(block=False) return event diff --git a/Lib/test/test_queue.py b/Lib/test/test_queue.py index 7f4fe357034b71..c855fb8fe2b05a 100644 --- a/Lib/test/test_queue.py +++ b/Lib/test/test_queue.py @@ -6,7 +6,7 @@ import time import unittest import weakref -from test.support import gc_collect +from test.support import gc_collect, bigmemtest from test.support import import_helper from test.support import threading_helper @@ -963,33 +963,33 @@ def test_order(self): # One producer, one consumer => results appended in well-defined order self.assertEqual(results, inputs) - def test_many_threads(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads(self, size): # Test multiple concurrent put() and get() - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, self.feed, self.consume) + results = self.run_threads(size, q, inputs, self.feed, self.consume) # Multiple consumers without synchronization append the # results in random order self.assertEqual(sorted(results), inputs) - def test_many_threads_nonblock(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_nonblock(self, size): # Test multiple concurrent put() and get(block=False) - N = 50 q = self.q inputs = list(range(10000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_nonblock) self.assertEqual(sorted(results), inputs) - def test_many_threads_timeout(self): + @bigmemtest(size=50, memuse=100*2**20, dry_run=False) + def test_many_threads_timeout(self, size): # Test multiple concurrent put() and get(timeout=...) - N = 50 q = self.q inputs = list(range(1000)) - results = self.run_threads(N, q, inputs, + results = self.run_threads(size, q, inputs, self.feed, self.consume_timeout) self.assertEqual(sorted(results), inputs) diff --git a/Lib/test/test_raise.py b/Lib/test/test_raise.py index dcf0753bc828f3..645ef291a58499 100644 --- a/Lib/test/test_raise.py +++ b/Lib/test/test_raise.py @@ -186,18 +186,14 @@ def test_class_cause(self): self.fail("No exception raised") def test_class_cause_nonexception_result(self): - class ConstructsNone(BaseException): - @classmethod + # See https://github.com/python/cpython/issues/140530. + class ConstructMortal(BaseException): def __new__(*args, **kwargs): - return None - try: - raise IndexError from ConstructsNone - except TypeError as e: - self.assertIn("should have returned an instance of BaseException", str(e)) - except IndexError: - self.fail("Wrong kind of exception raised") - else: - self.fail("No exception raised") + return ["mortal value"] + + msg = ".*should have returned an instance of BaseException.*" + with self.assertRaisesRegex(TypeError, msg): + raise IndexError from ConstructMortal def test_instance_cause(self): cause = KeyError() diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 43957f525f10c0..0217ebd132b110 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -14,6 +14,15 @@ from fractions import Fraction from collections import abc, Counter + +class MyIndex: + def __init__(self, value): + self.value = value + + def __index__(self): + return self.value + + class TestBasicOps: # Superclass with tests common to all generators. # Subclasses must arrange for self.gen to retrieve the Random instance @@ -142,6 +151,7 @@ def test_sample(self): # Exception raised if size of sample exceeds that of population self.assertRaises(ValueError, self.gen.sample, population, N+1) self.assertRaises(ValueError, self.gen.sample, [], -1) + self.assertRaises(TypeError, self.gen.sample, population, 1.0) def test_sample_distribution(self): # For the entire allowable range of 0 <= k <= N, validate that @@ -259,6 +269,7 @@ def test_choices(self): choices(data, range(4), k=5), choices(k=5, population=data, weights=range(4)), choices(k=5, population=data, cum_weights=range(4)), + choices(data, k=MyIndex(5)), ]: self.assertEqual(len(sample), 5) self.assertEqual(type(sample), list) @@ -369,118 +380,40 @@ def test_gauss(self): self.assertEqual(x1, x2) self.assertEqual(y1, y2) + @support.requires_IEEE_754 + def test_53_bits_per_float(self): + span = 2 ** 53 + cum = 0 + for i in range(100): + cum |= int(self.gen.random() * span) + self.assertEqual(cum, span-1) + def test_getrandbits(self): + getrandbits = self.gen.getrandbits # Verify ranges for k in range(1, 1000): - self.assertTrue(0 <= self.gen.getrandbits(k) < 2**k) - self.assertEqual(self.gen.getrandbits(0), 0) + self.assertTrue(0 <= getrandbits(k) < 2**k) + self.assertEqual(getrandbits(0), 0) # Verify all bits active - getbits = self.gen.getrandbits for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]: all_bits = 2**span-1 cum = 0 cpl_cum = 0 for i in range(100): - v = getbits(span) + v = getrandbits(span) cum |= v cpl_cum |= all_bits ^ v self.assertEqual(cum, all_bits) self.assertEqual(cpl_cum, all_bits) # Verify argument checking - self.assertRaises(TypeError, self.gen.getrandbits) - self.assertRaises(TypeError, self.gen.getrandbits, 1, 2) - self.assertRaises(ValueError, self.gen.getrandbits, -1) - self.assertRaises(TypeError, self.gen.getrandbits, 10.1) - - def test_pickling(self): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - state = pickle.dumps(self.gen, proto) - origseq = [self.gen.random() for i in range(10)] - newgen = pickle.loads(state) - restoredseq = [newgen.random() for i in range(10)] - self.assertEqual(origseq, restoredseq) - - def test_bug_1727780(self): - # verify that version-2-pickles can be loaded - # fine, whether they are created on 32-bit or 64-bit - # platforms, and that version-3-pickles load fine. - files = [("randv2_32.pck", 780), - ("randv2_64.pck", 866), - ("randv3.pck", 343)] - for file, value in files: - with open(support.findfile(file),"rb") as f: - r = pickle.load(f) - self.assertEqual(int(r.random()*1000), value) - - def test_bug_9025(self): - # Had problem with an uneven distribution in int(n*random()) - # Verify the fix by checking that distributions fall within expectations. - n = 100000 - randrange = self.gen.randrange - k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n)) - self.assertTrue(0.30 < k/n < .37, (k/n)) - - def test_randbytes(self): - # Verify ranges - for n in range(1, 10): - data = self.gen.randbytes(n) - self.assertEqual(type(data), bytes) - self.assertEqual(len(data), n) - - self.assertEqual(self.gen.randbytes(0), b'') - - # Verify argument checking - self.assertRaises(TypeError, self.gen.randbytes) - self.assertRaises(TypeError, self.gen.randbytes, 1, 2) - self.assertRaises(ValueError, self.gen.randbytes, -1) - self.assertRaises(TypeError, self.gen.randbytes, 1.0) - - def test_mu_sigma_default_args(self): - self.assertIsInstance(self.gen.normalvariate(), float) - self.assertIsInstance(self.gen.gauss(), float) - - -try: - random.SystemRandom().random() -except NotImplementedError: - SystemRandom_available = False -else: - SystemRandom_available = True - -@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available") -class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase): - gen = random.SystemRandom() - - def test_autoseed(self): - # Doesn't need to do anything except not fail - self.gen.seed() - - def test_saverestore(self): - self.assertRaises(NotImplementedError, self.gen.getstate) - self.assertRaises(NotImplementedError, self.gen.setstate, None) - - def test_seedargs(self): - # Doesn't need to do anything except not fail - self.gen.seed(100) - - def test_gauss(self): - self.gen.gauss_next = None - self.gen.seed(100) - self.assertEqual(self.gen.gauss_next, None) - - def test_pickling(self): - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto) - - def test_53_bits_per_float(self): - # This should pass whenever a C double has 53 bit precision. - span = 2 ** 53 - cum = 0 - for i in range(100): - cum |= int(self.gen.random() * span) - self.assertEqual(cum, span-1) + self.assertRaises(TypeError, getrandbits) + self.assertRaises(TypeError, getrandbits, 1, 2) + self.assertRaises(ValueError, getrandbits, -1) + self.assertRaises(OverflowError, getrandbits, 1<<1000) + self.assertRaises(ValueError, getrandbits, -1<<1000) + self.assertRaises(TypeError, getrandbits, 10.1) def test_bigrand(self): # The randrange routine should build-up the required number of bits @@ -559,6 +492,10 @@ def test_randrange_step(self): randrange(1000, step=100) with self.assertRaises(TypeError): randrange(1000, None, step=100) + with self.assertRaises(TypeError): + randrange(1000, step=MyIndex(1)) + with self.assertRaises(TypeError): + randrange(1000, None, step=MyIndex(1)) def test_randbelow_logic(self, _log=log, int=int): # check bitcount transition points: 2**i and 2**(i+1)-1 @@ -581,6 +518,116 @@ def test_randbelow_logic(self, _log=log, int=int): self.assertEqual(k, numbits) # note the stronger assertion self.assertTrue(2**k > n > 2**(k-1)) # note the stronger assertion + def test_randrange_index(self): + randrange = self.gen.randrange + self.assertIn(randrange(MyIndex(5)), range(5)) + self.assertIn(randrange(MyIndex(2), MyIndex(7)), range(2, 7)) + self.assertIn(randrange(MyIndex(5), MyIndex(15), MyIndex(2)), range(5, 15, 2)) + + def test_randint(self): + randint = self.gen.randint + self.assertIn(randint(2, 5), (2, 3, 4, 5)) + self.assertEqual(randint(2, 2), 2) + self.assertIn(randint(MyIndex(2), MyIndex(5)), (2, 3, 4, 5)) + self.assertEqual(randint(MyIndex(2), MyIndex(2)), 2) + + self.assertRaises(ValueError, randint, 5, 2) + self.assertRaises(TypeError, randint) + self.assertRaises(TypeError, randint, 2) + self.assertRaises(TypeError, randint, 2, 5, 1) + self.assertRaises(TypeError, randint, 2.0, 5) + self.assertRaises(TypeError, randint, 2, 5.0) + + def test_pickling(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + state = pickle.dumps(self.gen, proto) + origseq = [self.gen.random() for i in range(10)] + newgen = pickle.loads(state) + restoredseq = [newgen.random() for i in range(10)] + self.assertEqual(origseq, restoredseq) + + def test_bug_1727780(self): + # verify that version-2-pickles can be loaded + # fine, whether they are created on 32-bit or 64-bit + # platforms, and that version-3-pickles load fine. + files = [("randv2_32.pck", 780), + ("randv2_64.pck", 866), + ("randv3.pck", 343)] + for file, value in files: + with open(support.findfile(file),"rb") as f: + r = pickle.load(f) + self.assertEqual(int(r.random()*1000), value) + + def test_bug_9025(self): + # Had problem with an uneven distribution in int(n*random()) + # Verify the fix by checking that distributions fall within expectations. + n = 100000 + randrange = self.gen.randrange + k = sum(randrange(6755399441055744) % 3 == 2 for i in range(n)) + self.assertTrue(0.30 < k/n < .37, (k/n)) + + def test_randrange_bug_1590891(self): + start = 1000000000000 + stop = -100000000000000000000 + step = -200 + x = self.gen.randrange(start, stop, step) + self.assertTrue(stop < x <= start) + self.assertEqual((x+stop)%step, 0) + + def test_randbytes(self): + # Verify ranges + for n in range(1, 10): + data = self.gen.randbytes(n) + self.assertEqual(type(data), bytes) + self.assertEqual(len(data), n) + + self.assertEqual(self.gen.randbytes(0), b'') + + # Verify argument checking + self.assertRaises(TypeError, self.gen.randbytes) + self.assertRaises(TypeError, self.gen.randbytes, 1, 2) + self.assertRaises(ValueError, self.gen.randbytes, -1) + self.assertRaises(OverflowError, self.gen.randbytes, 1<<1000) + self.assertRaises((ValueError, OverflowError), self.gen.randbytes, -1<<1000) + self.assertRaises(TypeError, self.gen.randbytes, 1.0) + + def test_mu_sigma_default_args(self): + self.assertIsInstance(self.gen.normalvariate(), float) + self.assertIsInstance(self.gen.gauss(), float) + + +try: + random.SystemRandom().random() +except NotImplementedError: + SystemRandom_available = False +else: + SystemRandom_available = True + +@unittest.skipUnless(SystemRandom_available, "random.SystemRandom not available") +class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase): + gen = random.SystemRandom() + + def test_autoseed(self): + # Doesn't need to do anything except not fail + self.gen.seed() + + def test_saverestore(self): + self.assertRaises(NotImplementedError, self.gen.getstate) + self.assertRaises(NotImplementedError, self.gen.setstate, None) + + def test_seedargs(self): + # Doesn't need to do anything except not fail + self.gen.seed(100) + + def test_gauss(self): + self.gen.gauss_next = None + self.gen.seed(100) + self.assertEqual(self.gen.gauss_next, None) + + def test_pickling(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + self.assertRaises(NotImplementedError, pickle.dumps, self.gen, proto) + class TestRawMersenneTwister(unittest.TestCase): @test.support.cpython_only @@ -766,38 +813,6 @@ def test_long_seed(self): seed = (1 << (10000 * 8)) - 1 # about 10K bytes self.gen.seed(seed) - def test_53_bits_per_float(self): - # This should pass whenever a C double has 53 bit precision. - span = 2 ** 53 - cum = 0 - for i in range(100): - cum |= int(self.gen.random() * span) - self.assertEqual(cum, span-1) - - def test_bigrand(self): - # The randrange routine should build-up the required number of bits - # in stages so that all bit positions are active. - span = 2 ** 500 - cum = 0 - for i in range(100): - r = self.gen.randrange(span) - self.assertTrue(0 <= r < span) - cum |= r - self.assertEqual(cum, span-1) - - def test_bigrand_ranges(self): - for i in [40,80, 160, 200, 211, 250, 375, 512, 550]: - start = self.gen.randrange(2 ** (i-2)) - stop = self.gen.randrange(2 ** i) - if stop <= start: - continue - self.assertTrue(start <= self.gen.randrange(start, stop) < stop) - - def test_rangelimits(self): - for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]: - self.assertEqual(set(range(start,stop)), - set([self.gen.randrange(start,stop) for i in range(100)])) - def test_getrandbits(self): super().test_getrandbits() @@ -805,6 +820,25 @@ def test_getrandbits(self): self.gen.seed(1234567) self.assertEqual(self.gen.getrandbits(100), 97904845777343510404718956115) + self.gen.seed(1234567) + self.assertEqual(self.gen.getrandbits(MyIndex(100)), + 97904845777343510404718956115) + + def test_getrandbits_2G_bits(self): + size = 2**31 + self.gen.seed(1234567) + x = self.gen.getrandbits(size) + self.assertEqual(x.bit_length(), size) + self.assertEqual(x & (2**100-1), 890186470919986886340158459475) + self.assertEqual(x >> (size-100), 1226514312032729439655761284440) + + @support.bigmemtest(size=2**32, memuse=1/8+2/15, dry_run=False) + def test_getrandbits_4G_bits(self, size): + self.gen.seed(1234568) + x = self.gen.getrandbits(size) + self.assertEqual(x.bit_length(), size) + self.assertEqual(x & (2**100-1), 287241425661104632871036099814) + self.assertEqual(x >> (size-100), 739728759900339699429794460738) def test_randrange_uses_getrandbits(self): # Verify use of getrandbits by randrange @@ -816,27 +850,6 @@ def test_randrange_uses_getrandbits(self): self.assertEqual(self.gen.randrange(2**99), 97904845777343510404718956115) - def test_randbelow_logic(self, _log=log, int=int): - # check bitcount transition points: 2**i and 2**(i+1)-1 - # show that: k = int(1.001 + _log(n, 2)) - # is equal to or one greater than the number of bits in n - for i in range(1, 1000): - n = 1 << i # check an exact power of two - numbits = i+1 - k = int(1.00001 + _log(n, 2)) - self.assertEqual(k, numbits) - self.assertEqual(n, 2**(k-1)) - - n += n - 1 # check 1 below the next power of two - k = int(1.00001 + _log(n, 2)) - self.assertIn(k, [numbits, numbits+1]) - self.assertTrue(2**k > n > 2**(k-2)) - - n -= n >> 15 # check a little farther below the next power of two - k = int(1.00001 + _log(n, 2)) - self.assertEqual(k, numbits) # note the stronger assertion - self.assertTrue(2**k > n > 2**(k-1)) # note the stronger assertion - def test_randbelow_without_getrandbits(self): # Random._randbelow() can only use random() when the built-in one # has been overridden but no new getrandbits() method was supplied. @@ -871,14 +884,6 @@ def test_randbelow_without_getrandbits(self): self.gen._randbelow_without_getrandbits(n, maxsize=maxsize) self.assertEqual(random_mock.call_count, 2) - def test_randrange_bug_1590891(self): - start = 1000000000000 - stop = -100000000000000000000 - step = -200 - x = self.gen.randrange(start, stop, step) - self.assertTrue(stop < x <= start) - self.assertEqual((x+stop)%step, 0) - def test_choices_algorithms(self): # The various ways of specifying weights should produce the same results choices = self.gen.choices @@ -962,6 +967,14 @@ def test_randbytes_getrandbits(self): self.assertEqual(self.gen.randbytes(n), gen2.getrandbits(n * 8).to_bytes(n, 'little')) + @support.bigmemtest(size=2**29, memuse=1+16/15, dry_run=False) + def test_randbytes_256M(self, size): + self.gen.seed(2849427419) + x = self.gen.randbytes(size) + self.assertEqual(len(x), size) + self.assertEqual(x[:12].hex(), 'f6fd9ae63855ab91ea238b4f') + self.assertEqual(x[-12:].hex(), '0e7af69a84ee99bf4a11becc') + def test_sample_counts_equivalence(self): # Test the documented strong equivalence to a sample with repeated elements. # We run this test on random.Random() which makes deterministic selections @@ -1415,27 +1428,27 @@ class CommandLineTest(unittest.TestCase): def test_parse_args(self): args, help_text = random._parse_args(shlex.split("--choice a b c")) self.assertEqual(args.choice, ["a", "b", "c"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("--integer 5")) self.assertEqual(args.integer, 5) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("--float 2.5")) self.assertEqual(args.float, 2.5) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("a b c")) self.assertEqual(args.input, ["a", "b", "c"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("5")) self.assertEqual(args.input, ["5"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") args, help_text = random._parse_args(shlex.split("2.5")) self.assertEqual(args.input, ["2.5"]) - self.assertTrue(help_text.startswith("usage: ")) + self.assertStartsWith(help_text, "usage: ") def test_main(self): for command, expected in [ diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py index 3870b153688b25..2c9c290e8906b7 100644 --- a/Lib/test/test_range.py +++ b/Lib/test/test_range.py @@ -470,6 +470,16 @@ def test_iterator_setstate(self): it.__setstate__(2**64 - 7) self.assertEqual(list(it), [12, 10]) + def test_iterator_invalid_setstate(self): + for invalid_value in (1.0, ""): + ranges = (('rangeiter', range(10, 100, 2)), + ('longrangeiter', range(10, 2**65, 2))) + for rng_name, rng in ranges: + with self.subTest(invalid_value=invalid_value, range=rng_name): + it = iter(rng) + with self.assertRaises(TypeError): + it.__setstate__(invalid_value) + def test_odd_bug(self): # This used to raise a "SystemError: NULL result without error" # because the range validation step was eating the exception diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f79a6149078996..f6e797b3785dbe 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2178,6 +2178,8 @@ def test_bug_20998(self): self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") + @unittest.skipIf(sys.platform.startswith("sunos"), + "test doesn't work on Solaris, gh-91214") def test_locale_caching(self): # Issue #22410 oldlocale = locale.setlocale(locale.LC_CTYPE) @@ -2215,6 +2217,8 @@ def check_en_US_utf8(self): self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) @unittest.skipIf(linked_to_musl(), "musl libc issue, bpo-46390") + @unittest.skipIf(sys.platform.startswith("sunos"), + "test doesn't work on Solaris, gh-91214") def test_locale_compiled(self): oldlocale = locale.setlocale(locale.LC_CTYPE) self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) @@ -2868,11 +2872,11 @@ def test_long_pattern(self): pattern = 'Very %spattern' % ('long ' * 1000) r = repr(re.compile(pattern)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") + self.assertStartsWith(r, "re.compile('Very long long lon") r = repr(re.compile(pattern, re.I)) self.assertLess(len(r), 300) - self.assertEqual(r[:30], "re.compile('Very long long lon") - self.assertEqual(r[-16:], ", re.IGNORECASE)") + self.assertStartsWith(r, "re.compile('Very long long lon") + self.assertEndsWith(r, ", re.IGNORECASE)") def test_flags_repr(self): self.assertEqual(repr(re.I), "re.IGNORECASE") @@ -2951,7 +2955,7 @@ def test_deprecated_modules(self): self.assertEqual(mod.__name__, name) self.assertEqual(mod.__package__, '') for attr in deprecated[name]: - self.assertTrue(hasattr(mod, attr)) + self.assertHasAttr(mod, attr) del sys.modules[name] @cpython_only diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py index b9d082b3597f13..45192fe508270d 100644 --- a/Lib/test/test_readline.py +++ b/Lib/test/test_readline.py @@ -1,6 +1,7 @@ """ Very minimal unittests for parts of the readline module. """ +import codecs import locale import os import sys @@ -231,6 +232,13 @@ def test_nonascii(self): # writing and reading non-ASCII bytes into/from a TTY works, but # readline or ncurses ignores non-ASCII bytes on read. self.skipTest(f"the LC_CTYPE locale is {loc!r}") + if sys.flags.utf8_mode: + encoding = locale.getencoding() + encoding = codecs.lookup(encoding).name # normalize the name + if encoding != "utf-8": + # gh-133711: The Python UTF-8 Mode ignores the LC_CTYPE locale + # and always use the UTF-8 encoding. + self.skipTest(f"the LC_CTYPE encoding is {encoding!r}") try: readline.add_history("\xEB\xEF") diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 7e317d5ab943ff..220b1ebcd7bc1f 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -182,6 +182,22 @@ def test_randomize(self): self.assertTrue(regrtest.randomize) self.assertIsInstance(regrtest.random_seed, int) + def test_no_randomize(self): + ns = self.parse_args([]) + self.assertIs(ns.randomize, False) + + ns = self.parse_args(["--randomize"]) + self.assertIs(ns.randomize, True) + + ns = self.parse_args(["--no-randomize"]) + self.assertIs(ns.randomize, False) + + ns = self.parse_args(["--randomize", "--no-randomize"]) + self.assertIs(ns.randomize, False) + + ns = self.parse_args(["--no-randomize", "--randomize"]) + self.assertIs(ns.randomize, False) + def test_randseed(self): ns = self.parse_args(['--randseed', '12345']) self.assertEqual(ns.random_seed, 12345) @@ -189,6 +205,10 @@ def test_randseed(self): self.checkError(['--randseed'], 'expected one argument') self.checkError(['--randseed', 'foo'], 'invalid int value') + ns = self.parse_args(['--randseed', '12345', '--no-randomize']) + self.assertEqual(ns.random_seed, 12345) + self.assertFalse(ns.randomize) + def test_fromfile(self): for opt in '-f', '--fromfile': with self.subTest(opt=opt): @@ -428,15 +448,17 @@ def create_regrtest(self, args): return regrtest - def check_ci_mode(self, args, use_resources, rerun=True): + def check_ci_mode(self, args, use_resources, + *, rerun=True, randomize=True, output_on_failure=True): regrtest = self.create_regrtest(args) self.assertEqual(regrtest.num_workers, -1) self.assertEqual(regrtest.want_rerun, rerun) - self.assertTrue(regrtest.randomize) + self.assertEqual(regrtest.fail_rerun, False) + self.assertEqual(regrtest.randomize, randomize) self.assertIsInstance(regrtest.random_seed, int) self.assertTrue(regrtest.fail_env_changed) self.assertTrue(regrtest.print_slowest) - self.assertTrue(regrtest.output_on_failure) + self.assertEqual(regrtest.output_on_failure, output_on_failure) self.assertEqual(sorted(regrtest.use_resources), sorted(use_resources)) return regrtest @@ -463,12 +485,29 @@ def test_fast_ci_resource(self): use_resources.remove('network') self.check_ci_mode(args, use_resources) + def test_fast_ci_verbose(self): + args = ['--fast-ci', '--verbose'] + use_resources = sorted(cmdline.ALL_RESOURCES) + use_resources.remove('cpu') + regrtest = self.check_ci_mode(args, use_resources, + output_on_failure=False) + self.assertEqual(regrtest.verbose, True) + def test_slow_ci(self): args = ['--slow-ci'] use_resources = sorted(cmdline.ALL_RESOURCES) regrtest = self.check_ci_mode(args, use_resources) self.assertEqual(regrtest.timeout, 20 * 60) + def test_ci_no_randomize(self): + all_resources = set(cmdline.ALL_RESOURCES) + self.check_ci_mode( + ["--slow-ci", "--no-randomize"], all_resources, randomize=False + ) + self.check_ci_mode( + ["--fast-ci", "--no-randomize"], all_resources - {'cpu'}, randomize=False + ) + def test_dont_add_python_opts(self): args = ['--dont-add-python-opts'] ns = cmdline._parse_args(args) @@ -768,13 +807,16 @@ def run_command(self, args, input=None, exitcode=0, **kw): self.fail(msg) return proc - def run_python(self, args, **kw): + def run_python(self, args, isolated=True, **kw): extraargs = [] if 'uops' in sys._xoptions: # Pass -X uops along extraargs.extend(['-X', 'uops']) - args = [sys.executable, *extraargs, '-X', 'faulthandler', '-I', *args] - proc = self.run_command(args, **kw) + cmd = [sys.executable, *extraargs, '-X', 'faulthandler'] + if isolated: + cmd.append('-I') + cmd.extend(args) + proc = self.run_command(cmd, **kw) return proc.stdout @@ -831,8 +873,8 @@ def check_output(self, output): self.check_executed_tests(output, self.tests, randomize=True, stats=len(self.tests)) - def run_tests(self, args, env=None): - output = self.run_python(args, env=env) + def run_tests(self, args, env=None, isolated=True): + output = self.run_python(args, env=env, isolated=isolated) self.check_output(output) def test_script_regrtest(self): @@ -874,7 +916,10 @@ def test_script_autotest(self): self.run_tests(args) def run_batch(self, *args): - proc = self.run_command(args) + proc = self.run_command(args, + # gh-133711: cmd.exe uses the OEM code page + # to display the non-ASCII current directory + errors="backslashreplace") self.check_output(proc.stdout) @unittest.skipUnless(sysconfig.is_python_build(), @@ -2273,7 +2318,6 @@ def test_pass(self): def test_xml(self): code = textwrap.dedent(r""" import unittest - from test import support class VerboseTests(unittest.TestCase): def test_failed(self): @@ -2308,6 +2352,50 @@ def test_failed(self): for out in testcase.iter('system-out'): self.assertEqual(out.text, r"abc \x1b def") + def test_nonascii(self): + code = textwrap.dedent(r""" + import unittest + + class NonASCIITests(unittest.TestCase): + def test_docstring(self): + '''docstring:\u20ac''' + + def test_subtest(self): + with self.subTest(param='subtest:\u20ac'): + pass + + def test_skip(self): + self.skipTest('skipped:\u20ac') + """) + testname = self.create_test(code=code) + + env = dict(os.environ) + env['PYTHONIOENCODING'] = 'ascii' + + def check(output): + self.check_executed_tests(output, testname, stats=TestStats(3, 0, 1)) + self.assertIn(r'docstring:\u20ac', output) + self.assertIn(r'skipped:\u20ac', output) + + # Run sequentially + output = self.run_tests('-v', testname, env=env, isolated=False) + check(output) + + # Run in parallel + output = self.run_tests('-j1', '-v', testname, env=env, isolated=False) + check(output) + + def test_pgo_exclude(self): + # Get PGO tests + output = self.run_tests('--pgo', '--list-tests') + pgo_tests = output.strip().split() + + # Exclude test_re + output = self.run_tests('--pgo', '--list-tests', '-x', 'test_re') + tests = output.strip().split() + self.assertNotIn('test_re', tests) + self.assertEqual(len(tests), len(pgo_tests) - 1) + class TestUtils(unittest.TestCase): def test_format_duration(self): diff --git a/Lib/test/test_remote_pdb.py b/Lib/test/test_remote_pdb.py index aef8a6b0129092..ec11e41678849b 100644 --- a/Lib/test/test_remote_pdb.py +++ b/Lib/test/test_remote_pdb.py @@ -1,5 +1,4 @@ import io -import time import itertools import json import os @@ -8,16 +7,13 @@ import socket import subprocess import sys -import tempfile import textwrap -import threading import unittest import unittest.mock from contextlib import closing, contextmanager, redirect_stdout, redirect_stderr, ExitStack -from pathlib import Path -from test.support import is_wasi, cpython_only, force_color, requires_subprocess, SHORT_TIMEOUT -from test.support.os_helper import temp_dir, TESTFN, unlink -from typing import Dict, List, Optional, Tuple, Union, Any +from test.support import is_wasi, cpython_only, force_color, requires_subprocess, SHORT_TIMEOUT, subTests +from test.support.os_helper import TESTFN, unlink +from typing import List import pdb from pdb import _PdbServer, _PdbClient @@ -283,37 +279,50 @@ def test_handling_other_message(self): expected_stdout="Some message.\n", ) - def test_handling_help_for_command(self): - """Test handling a request to display help for a command.""" + @unittest.skipIf(sys.flags.optimize >= 2, "Help not available for -OO") + @subTests( + "help_request,expected_substring", + [ + # a request to display help for a command + ({"help": "ll"}, "Usage: ll | longlist"), + # a request to display a help overview + ({"help": ""}, "type help <topic>"), + # a request to display the full PDB manual + ({"help": "pdb"}, ">>> import pdb"), + ], + ) + def test_handling_help_when_available(self, help_request, expected_substring): + """Test handling help requests when help is available.""" incoming = [ - ("server", {"help": "ll"}), + ("server", help_request), ] self.do_test( incoming=incoming, expected_outgoing=[], - expected_stdout_substring="Usage: ll | longlist", + expected_stdout_substring=expected_substring, ) - def test_handling_help_without_a_specific_topic(self): - """Test handling a request to display a help overview.""" + @unittest.skipIf(sys.flags.optimize < 2, "Needs -OO") + @subTests( + "help_request,expected_substring", + [ + # a request to display help for a command + ({"help": "ll"}, "No help for 'll'"), + # a request to display a help overview + ({"help": ""}, "Undocumented commands"), + # a request to display the full PDB manual + ({"help": "pdb"}, "No help for 'pdb'"), + ], + ) + def test_handling_help_when_not_available(self, help_request, expected_substring): + """Test handling help requests when help is not available.""" incoming = [ - ("server", {"help": ""}), + ("server", help_request), ] self.do_test( incoming=incoming, expected_outgoing=[], - expected_stdout_substring="type help <topic>", - ) - - def test_handling_help_pdb(self): - """Test handling a request to display the full PDB manual.""" - incoming = [ - ("server", {"help": "pdb"}), - ] - self.do_test( - incoming=incoming, - expected_outgoing=[], - expected_stdout_substring=">>> import pdb", + expected_stdout_substring=expected_substring, ) def test_handling_pdb_prompts(self): @@ -1434,7 +1443,6 @@ def test_multi_line_commands(self): def _supports_remote_attaching(): - from contextlib import suppress PROCESS_VM_READV_SUPPORTED = False try: @@ -1531,6 +1539,9 @@ def do_integration_test(self, client_stdin): redirect_stdout(client_stdout), redirect_stderr(client_stderr), unittest.mock.patch("sys.argv", ["pdb", "-p", str(process.pid)]), + unittest.mock.patch( + "pdb.exit_with_permission_help_text", side_effect=PermissionError + ), ): try: pdb.main() diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 228b326699e75f..58bd2b5d916858 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -5,6 +5,7 @@ import subprocess import sys import unittest +from functools import partial from textwrap import dedent from test import support from test.support import ( @@ -27,7 +28,7 @@ raise unittest.SkipTest("test module requires subprocess") -def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): +def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, custom=False, **kw): """Run the Python REPL with the given arguments. kw is extra keyword args to pass to subprocess.Popen. Returns a Popen @@ -41,7 +42,11 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): # path may be used by Py_GetPath() to build the default module search # path. stdin_fname = os.path.join(os.path.dirname(sys.executable), "<stdin>") - cmd_line = [stdin_fname, '-I', '-i'] + cmd_line = [stdin_fname, '-I'] + # Don't re-run the built-in REPL from interactive mode + # if we're testing a custom REPL (such as the asyncio REPL). + if not custom: + cmd_line.append('-i') cmd_line.extend(args) # Set TERM=vt100, for the rationale see the comments in spawn_python() of @@ -55,6 +60,10 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): stdout=stdout, stderr=stderr, **kw) + +spawn_asyncio_repl = partial(spawn_repl, "-m", "asyncio", custom=True) + + def run_on_interactive_mode(source): """Spawn a new Python interpreter, pass the given input source code from the stdin and return the @@ -188,6 +197,68 @@ def foo(x): ] self.assertEqual(traceback_lines, expected_lines) + def test_pythonstartup_error_reporting(self): + # errors based on https://github.com/python/cpython/issues/137576 + + def make_repl(env): + return subprocess.Popen( + [os.path.join(os.path.dirname(sys.executable), '<stdin>'), "-i"], + executable=sys.executable, + text=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, + ) + + # case 1: error in user input, but PYTHONSTARTUP is fine + with os_helper.temp_dir() as tmpdir: + script = os.path.join(tmpdir, "pythonstartup.py") + with open(script, "w") as f: + f.write("print('from pythonstartup')" + os.linesep) + + env = os.environ.copy() + env['PYTHONSTARTUP'] = script + env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist") + p = make_repl(env) + p.stdin.write("1/0") + output = kill_python(p) + expected = dedent(""" + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + 1/0 + ~^~ + ZeroDivisionError: division by zero + """) + self.assertIn("from pythonstartup", output) + self.assertIn(expected, output) + + # case 2: error in PYTHONSTARTUP triggered by user input + with os_helper.temp_dir() as tmpdir: + script = os.path.join(tmpdir, "pythonstartup.py") + with open(script, "w") as f: + f.write("def foo():\n 1/0\n") + + env = os.environ.copy() + env['PYTHONSTARTUP'] = script + env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist") + p = make_repl(env) + p.stdin.write('foo()') + output = kill_python(p) + expected = dedent(""" + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + foo() + ~~~^^ + File "%s", line 2, in foo + 1/0 + ~^~ + ZeroDivisionError: division by zero + """) % script + self.assertIn(expected, output) + + + def test_runsource_show_syntax_error_location(self): user_input = dedent("""def f(x, x): ... """) @@ -297,7 +368,7 @@ def f(): class TestAsyncioREPL(unittest.TestCase): def test_multiple_statements_fail_early(self): user_input = "1 / 0; print(f'afterwards: {1+1}')" - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) output = kill_python(p) self.assertIn("ZeroDivisionError", output) @@ -309,7 +380,7 @@ def test_toplevel_contextvars_sync(self): var = ContextVar("var", default="failed") var.set("ok") """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = dedent(""" print(f"toplevel contextvar test: {var.get()}") @@ -325,7 +396,7 @@ def test_toplevel_contextvars_async(self): from contextvars import ContextVar var = ContextVar('var', default='failed') """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = "async def set_var(): var.set('ok')\n" p.stdin.write(user_input2) diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index ffad35092f9916..22a55b57c076eb 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -151,14 +151,38 @@ def test_frozenset(self): eq(r(frozenset({1, 2, 3, 4, 5, 6, 7})), "frozenset({1, 2, 3, 4, 5, 6, ...})") def test_numbers(self): - eq = self.assertEqual - eq(r(123), repr(123)) - eq(r(123), repr(123)) - eq(r(1.0/3), repr(1.0/3)) - - n = 10**100 - expected = repr(n)[:18] + "..." + repr(n)[-19:] - eq(r(n), expected) + for x in [123, 1.0 / 3]: + self.assertEqual(r(x), repr(x)) + + max_digits = sys.get_int_max_str_digits() + for k in [100, max_digits - 1]: + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + expected = repr(n)[:18] + "..." + repr(n)[-19:] + self.assertEqual(r(n), expected) + + def re_msg(n, d): + return (rf'<{n.__class__.__name__} instance with roughly {d} ' + rf'digits \(limit at {max_digits}\) at 0x[a-f0-9]+>') + + k = max_digits + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + + for k in [max_digits + 1, 2 * max_digits]: + self.assertGreater(k, 100) + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + with self.subTest(f'10 ** {k} - 1', k=k): + n = 10 ** k - 1 + # Here, since math.log10(n) == math.log10(n-1), + # the number of digits of n - 1 is overestimated. + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) def test_instance(self): eq = self.assertEqual @@ -173,13 +197,13 @@ def test_instance(self): eq(r(i3), ("<ClassWithFailingRepr instance at %#x>"%id(i3))) s = r(ClassWithFailingRepr) - self.assertTrue(s.startswith("<class ")) - self.assertTrue(s.endswith(">")) + self.assertStartsWith(s, "<class ") + self.assertEndsWith(s, ">") self.assertIn(s.find("..."), [12, 13]) def test_lambda(self): r = repr(lambda x: x) - self.assertTrue(r.startswith("<function ReprTests.test_lambda.<locals>.<lambda"), r) + self.assertStartsWith(r, "<function ReprTests.test_lambda.<locals>.<lambda") # XXX anonymous functions? see func_repr def test_builtin_function(self): @@ -187,8 +211,8 @@ def test_builtin_function(self): # Functions eq(repr(hash), '<built-in function hash>') # Methods - self.assertTrue(repr(''.split).startswith( - '<built-in method split of str object at 0x')) + self.assertStartsWith(repr(''.split), + '<built-in method split of str object at 0x') def test_range(self): eq = self.assertEqual @@ -373,20 +397,20 @@ def test_valid_indent(self): 'object': { 1: 'two', b'three': [ - (4.5, 6.7), + (4.5, 6.25), [set((8, 9)), frozenset((10, 11))], ], }, 'tests': ( (dict(indent=None), '''\ - {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''), + {1: 'two', b'three': [(4.5, 6.25), [{8, 9}, frozenset({10, 11})]]}'''), (dict(indent=False), '''\ { 1: 'two', b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -406,7 +430,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -426,7 +450,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -446,7 +470,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -466,7 +490,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -494,7 +518,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -514,7 +538,7 @@ def test_valid_indent(self): -->b'three': [ -->-->( -->-->-->4.5, - -->-->-->6.7, + -->-->-->6.25, -->-->), -->-->[ -->-->-->{ @@ -534,7 +558,7 @@ def test_valid_indent(self): ....b'three': [ ........( ............4.5, - ............6.7, + ............6.25, ........), ........[ ............{ @@ -730,8 +754,8 @@ class baz: importlib.invalidate_caches() from areallylongpackageandmodulenametotestreprtruncation.areallylongpackageandmodulenametotestreprtruncation import baz ibaz = baz.baz() - self.assertTrue(repr(ibaz).startswith( - "<%s.baz object at 0x" % baz.__name__)) + self.assertStartsWith(repr(ibaz), + "<%s.baz object at 0x" % baz.__name__) def test_method(self): self._check_path_limitations('qux') @@ -744,13 +768,13 @@ def amethod(self): pass from areallylongpackageandmodulenametotestreprtruncation.areallylongpackageandmodulenametotestreprtruncation import qux # Unbound methods first r = repr(qux.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.amethod) - self.assertTrue(r.startswith('<function aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.amethod'), r) + self.assertStartsWith(r, '<function aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.amethod') # Bound method next iqux = qux.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() r = repr(iqux.amethod) - self.assertTrue(r.startswith( + self.assertStartsWith(r, '<bound method aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.amethod of <%s.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa object at 0x' \ - % (qux.__name__,) ), r) + % (qux.__name__,) ) @unittest.skip('needs a built-in function with a really long name') def test_builtin_function(self): diff --git a/Lib/test/test_resource.py b/Lib/test/test_resource.py index d23d3623235f38..fe05224828bd27 100644 --- a/Lib/test/test_resource.py +++ b/Lib/test/test_resource.py @@ -14,89 +14,154 @@ class ResourceTest(unittest.TestCase): def test_args(self): self.assertRaises(TypeError, resource.getrlimit) - self.assertRaises(TypeError, resource.getrlimit, 42, 42) + self.assertRaises(TypeError, resource.getrlimit, 0, 42) + self.assertRaises(OverflowError, resource.getrlimit, 2**1000) + self.assertRaises(OverflowError, resource.getrlimit, -2**1000) + self.assertRaises(TypeError, resource.getrlimit, '0') self.assertRaises(TypeError, resource.setrlimit) - self.assertRaises(TypeError, resource.setrlimit, 42, 42, 42) + self.assertRaises(TypeError, resource.setrlimit, 0) + self.assertRaises(TypeError, resource.setrlimit, 0, 42) + self.assertRaises(TypeError, resource.setrlimit, 0, 42, 42) + self.assertRaises(OverflowError, resource.setrlimit, 2**1000, (42, 42)) + self.assertRaises(OverflowError, resource.setrlimit, -2**1000, (42, 42)) + self.assertRaises(ValueError, resource.setrlimit, 0, (42,)) + self.assertRaises(ValueError, resource.setrlimit, 0, (42, 42, 42)) + self.assertRaises(TypeError, resource.setrlimit, '0', (42, 42)) + self.assertRaises(TypeError, resource.setrlimit, 0, ('42', 42)) + self.assertRaises(TypeError, resource.setrlimit, 0, (42, '42')) @unittest.skipIf(sys.platform == "vxworks", "setting RLIMIT_FSIZE is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_FSIZE'), 'requires resource.RLIMIT_FSIZE') def test_fsize_ismax(self): - try: - (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) - except AttributeError: - pass - else: - # RLIMIT_FSIZE should be RLIM_INFINITY, which will be a really big - # number on a platform with large file support. On these platforms, - # we need to test that the get/setrlimit functions properly convert - # the number to a C long long and that the conversion doesn't raise - # an error. - self.assertEqual(resource.RLIM_INFINITY, max) - resource.setrlimit(resource.RLIMIT_FSIZE, (cur, max)) + (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) + # RLIMIT_FSIZE should be RLIM_INFINITY, which will be a really big + # number on a platform with large file support. On these platforms, + # we need to test that the get/setrlimit functions properly convert + # the number to a C long long and that the conversion doesn't raise + # an error. + self.assertEqual(resource.RLIM_INFINITY, max) + resource.setrlimit(resource.RLIMIT_FSIZE, (cur, max)) + @unittest.skipIf(sys.platform == "vxworks", + "setting RLIMIT_FSIZE is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_FSIZE'), 'requires resource.RLIMIT_FSIZE') def test_fsize_enforced(self): + (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) + # Check to see what happens when the RLIMIT_FSIZE is small. Some + # versions of Python were terminated by an uncaught SIGXFSZ, but + # pythonrun.c has been fixed to ignore that exception. If so, the + # write() should return EFBIG when the limit is exceeded. + + # At least one platform has an unlimited RLIMIT_FSIZE and attempts + # to change it raise ValueError instead. try: - (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) - except AttributeError: - pass - else: - # Check to see what happens when the RLIMIT_FSIZE is small. Some - # versions of Python were terminated by an uncaught SIGXFSZ, but - # pythonrun.c has been fixed to ignore that exception. If so, the - # write() should return EFBIG when the limit is exceeded. - - # At least one platform has an unlimited RLIMIT_FSIZE and attempts - # to change it raise ValueError instead. try: + resource.setrlimit(resource.RLIMIT_FSIZE, (1024, max)) + limit_set = True + except ValueError: + limit_set = False + f = open(os_helper.TESTFN, "wb") + try: + f.write(b"X" * 1024) try: - resource.setrlimit(resource.RLIMIT_FSIZE, (1024, max)) - limit_set = True - except ValueError: - limit_set = False - f = open(os_helper.TESTFN, "wb") - try: - f.write(b"X" * 1024) - try: - f.write(b"Y") + f.write(b"Y") + f.flush() + # On some systems (e.g., Ubuntu on hppa) the flush() + # doesn't always cause the exception, but the close() + # does eventually. Try flushing several times in + # an attempt to ensure the file is really synced and + # the exception raised. + for i in range(5): + time.sleep(.1) f.flush() - # On some systems (e.g., Ubuntu on hppa) the flush() - # doesn't always cause the exception, but the close() - # does eventually. Try flushing several times in - # an attempt to ensure the file is really synced and - # the exception raised. - for i in range(5): - time.sleep(.1) - f.flush() - except OSError: - if not limit_set: - raise - if limit_set: - # Close will attempt to flush the byte we wrote - # Restore limit first to avoid getting a spurious error - resource.setrlimit(resource.RLIMIT_FSIZE, (cur, max)) - finally: - f.close() - finally: + except OSError: + if not limit_set: + raise if limit_set: + # Close will attempt to flush the byte we wrote + # Restore limit first to avoid getting a spurious error resource.setrlimit(resource.RLIMIT_FSIZE, (cur, max)) - os_helper.unlink(os_helper.TESTFN) + finally: + f.close() + finally: + if limit_set: + resource.setrlimit(resource.RLIMIT_FSIZE, (cur, max)) + os_helper.unlink(os_helper.TESTFN) - def test_fsize_toobig(self): + @unittest.skipIf(sys.platform == "vxworks", + "setting RLIMIT_FSIZE is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_FSIZE'), 'requires resource.RLIMIT_FSIZE') + def test_fsize_too_big(self): # Be sure that setrlimit is checking for really large values too_big = 10**50 + (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) + try: + resource.setrlimit(resource.RLIMIT_FSIZE, (too_big, max)) + except (OverflowError, ValueError): + pass try: - (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) - except AttributeError: + resource.setrlimit(resource.RLIMIT_FSIZE, (max, too_big)) + except (OverflowError, ValueError): pass + + @unittest.skipIf(sys.platform == "vxworks", + "setting RLIMIT_FSIZE is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_FSIZE'), 'requires resource.RLIMIT_FSIZE') + def test_fsize_not_too_big(self): + (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) + self.addCleanup(resource.setrlimit, resource.RLIMIT_FSIZE, (cur, max)) + + def expected(cur): + if resource.RLIM_INFINITY < 0: + return [(cur, max), (resource.RLIM_INFINITY, max)] + elif resource.RLIM_INFINITY < cur: + return [(resource.RLIM_INFINITY, max)] + else: + return [(cur, max)] + + resource.setrlimit(resource.RLIMIT_FSIZE, (2**31-5, max)) + self.assertEqual(resource.getrlimit(resource.RLIMIT_FSIZE), (2**31-5, max)) + + try: + resource.setrlimit(resource.RLIMIT_FSIZE, (2**32, max)) + except OverflowError: + resource.setrlimit(resource.RLIMIT_FSIZE, (2**31, max)) + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**31)) + resource.setrlimit(resource.RLIMIT_FSIZE, (2**32-5, max)) + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**32-5)) else: + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**32)) + resource.setrlimit(resource.RLIMIT_FSIZE, (2**31, max)) + self.assertEqual(resource.getrlimit(resource.RLIMIT_FSIZE), (2**31, max)) + resource.setrlimit(resource.RLIMIT_FSIZE, (2**32-5, max)) + self.assertEqual(resource.getrlimit(resource.RLIMIT_FSIZE), (2**32-5, max)) + + resource.setrlimit(resource.RLIMIT_FSIZE, (2**63-5, max)) + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**63-5)) try: - resource.setrlimit(resource.RLIMIT_FSIZE, (too_big, max)) - except (OverflowError, ValueError): - pass - try: - resource.setrlimit(resource.RLIMIT_FSIZE, (max, too_big)) - except (OverflowError, ValueError): + resource.setrlimit(resource.RLIMIT_FSIZE, (2**63, max)) + except ValueError: + # There is a hard limit on macOS. pass + else: + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**63)) + resource.setrlimit(resource.RLIMIT_FSIZE, (2**64-5, max)) + self.assertIn(resource.getrlimit(resource.RLIMIT_FSIZE), expected(2**64-5)) + + @unittest.skipIf(sys.platform == "vxworks", + "setting RLIMIT_FSIZE is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_FSIZE'), 'requires resource.RLIMIT_FSIZE') + def test_fsize_negative(self): + (cur, max) = resource.getrlimit(resource.RLIMIT_FSIZE) + for value in -5, -2**31, -2**32-5, -2**63, -2**64-5, -2**1000: + with self.subTest(value=value): + # This test assumes that the values don't map to RLIM_INFINITY, + # though Posix doesn't guarantee it. + self.assertNotEqual(value, resource.RLIM_INFINITY) + + self.assertRaises(ValueError, resource.setrlimit, resource.RLIMIT_FSIZE, (value, max)) + self.assertRaises(ValueError, resource.setrlimit, resource.RLIMIT_FSIZE, (cur, value)) @unittest.skipUnless(hasattr(resource, "getrusage"), "needs getrusage") def test_getrusage(self): @@ -117,21 +182,18 @@ def test_getrusage(self): # Issue 6083: Reference counting bug @unittest.skipIf(sys.platform == "vxworks", "setting RLIMIT_CPU is not supported on VxWorks") + @unittest.skipUnless(hasattr(resource, 'RLIMIT_CPU'), 'requires resource.RLIMIT_CPU') def test_setrusage_refcount(self): - try: - limits = resource.getrlimit(resource.RLIMIT_CPU) - except AttributeError: - pass - else: - class BadSequence: - def __len__(self): - return 2 - def __getitem__(self, key): - if key in (0, 1): - return len(tuple(range(1000000))) - raise IndexError + limits = resource.getrlimit(resource.RLIMIT_CPU) + class BadSequence: + def __len__(self): + return 2 + def __getitem__(self, key): + if key in (0, 1): + return len(tuple(range(1000000))) + raise IndexError - resource.setrlimit(resource.RLIMIT_CPU, BadSequence()) + resource.setrlimit(resource.RLIMIT_CPU, BadSequence()) def test_pagesize(self): pagesize = resource.getpagesize() @@ -168,7 +230,8 @@ class BadSeq: def __len__(self): return 2 def __getitem__(self, key): - return limits[key] - 1 # new reference + lim = limits[key] + return lim - 1 if lim > 0 else lim + sys.maxsize*2 # new reference limits = resource.getrlimit(resource.RLIMIT_AS) self.assertEqual(resource.prlimit(0, resource.RLIMIT_AS, BadSeq()), diff --git a/Lib/test/test_rlcompleter.py b/Lib/test/test_rlcompleter.py index d403a0fe96be3d..a8914953ce9eb4 100644 --- a/Lib/test/test_rlcompleter.py +++ b/Lib/test/test_rlcompleter.py @@ -88,7 +88,7 @@ def create_expected_for_none(): ['CompleteMe._ham']) matches = self.completer.attr_matches('CompleteMe.__') for x in matches: - self.assertTrue(x.startswith('CompleteMe.__'), x) + self.assertStartsWith(x, 'CompleteMe.__') self.assertIn('CompleteMe.__name__', matches) self.assertIn('CompleteMe.__new__(', matches) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 8d89e2a8224452..e33723cc70c877 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -16,6 +16,14 @@ class BaseRobotTest: bad = [] site_maps = None + def __init_subclass__(cls): + super().__init_subclass__() + # Remove tests that do nothing. + if not cls.good: + cls.test_good_urls = None + if not cls.bad: + cls.test_bad_urls = None + def setUp(self): lines = io.StringIO(self.robots_txt).readlines() self.parser = urllib.robotparser.RobotFileParser() @@ -231,9 +239,16 @@ class DisallowQueryStringTest(BaseRobotTest, unittest.TestCase): robots_txt = """\ User-agent: * Disallow: /some/path?name=value +Disallow: /another/path? +Disallow: /yet/one/path?name=value&more """ - good = ['/some/path'] - bad = ['/some/path?name=value'] + good = ['/some/path', '/some/path?', + '/some/path%3Fname=value', '/some/path?name%3Dvalue', + '/another/path', '/another/path%3F', + '/yet/one/path?name=value%26more'] + bad = ['/some/path?name=value' + '/another/path?', '/another/path?name=value', + '/yet/one/path?name=value&more'] class UseFirstUserAgentWildcardTest(BaseRobotTest, unittest.TestCase): @@ -249,15 +264,79 @@ class UseFirstUserAgentWildcardTest(BaseRobotTest, unittest.TestCase): bad = ['/some/path'] -class EmptyQueryStringTest(BaseRobotTest, unittest.TestCase): - # normalize the URL first (#17403) +class PercentEncodingTest(BaseRobotTest, unittest.TestCase): robots_txt = """\ User-agent: * -Allow: /some/path? -Disallow: /another/path? - """ - good = ['/some/path?'] - bad = ['/another/path?'] +Disallow: /a1/Z-._~ # unreserved characters +Disallow: /a2/%5A%2D%2E%5F%7E # percent-encoded unreserved characters +Disallow: /u1/%F0%9F%90%8D # percent-encoded ASCII Unicode character +Disallow: /u2/%f0%9f%90%8d +Disallow: /u3/\U0001f40d # raw non-ASCII Unicode character +Disallow: /v1/%F0 # percent-encoded non-ASCII octet +Disallow: /v2/%f0 +Disallow: /v3/\udcf0 # raw non-ASCII octet +Disallow: /p1%xy # raw percent +Disallow: /p2% +Disallow: /p3%25xy # percent-encoded percent +Disallow: /p4%2525xy # double percent-encoded percent +Disallow: /john%20smith # space +Disallow: /john doe +Disallow: /trailingspace%20 +Disallow: /question%3Fq=v # not query +Disallow: /hash%23f # not fragment +Disallow: /dollar%24 +Disallow: /asterisk%2A +Disallow: /sub/dir +Disallow: /slash%2F +Disallow: /query/question?q=%3F +Disallow: /query/raw/question?q=? +Disallow: /query/eq?q%3Dv +Disallow: /query/amp?q=v%26a +""" + good = [ + '/u1/%F0', '/u1/%f0', + '/u2/%F0', '/u2/%f0', + '/u3/%F0', '/u3/%f0', + '/p1%2525xy', '/p2%f0', '/p3%2525xy', '/p4%xy', '/p4%25xy', + '/question?q=v', + '/dollar', '/asterisk', + '/query/eq?q=v', + '/query/amp?q=v&a', + ] + bad = [ + '/a1/Z-._~', '/a1/%5A%2D%2E%5F%7E', + '/a2/Z-._~', '/a2/%5A%2D%2E%5F%7E', + '/u1/%F0%9F%90%8D', '/u1/%f0%9f%90%8d', '/u1/\U0001f40d', + '/u2/%F0%9F%90%8D', '/u2/%f0%9f%90%8d', '/u2/\U0001f40d', + '/u3/%F0%9F%90%8D', '/u3/%f0%9f%90%8d', '/u3/\U0001f40d', + '/v1/%F0', '/v1/%f0', '/v1/\udcf0', '/v1/\U0001f40d', + '/v2/%F0', '/v2/%f0', '/v2/\udcf0', '/v2/\U0001f40d', + '/v3/%F0', '/v3/%f0', '/v3/\udcf0', '/v3/\U0001f40d', + '/p1%xy', '/p1%25xy', + '/p2%', '/p2%25', '/p2%2525', '/p2%xy', + '/p3%xy', '/p3%25xy', + '/p4%2525xy', + '/john%20smith', '/john smith', + '/john%20doe', '/john doe', + '/trailingspace%20', '/trailingspace ', + '/question%3Fq=v', + '/hash#f', '/hash%23f', + '/dollar$', '/dollar%24', + '/asterisk*', '/asterisk%2A', + '/sub/dir', '/sub%2Fdir', + '/slash%2F', '/slash/', + '/query/question?q=?', '/query/question?q=%3F', + '/query/raw/question?q=?', '/query/raw/question?q=%3F', + '/query/eq?q%3Dv', + '/query/amp?q=v%26a', + ] + # other reserved characters + for c in ":/#[]@!$&'()*+,;=": + robots_txt += f'Disallow: /raw{c}\nDisallow: /pc%{ord(c):02X}\n' + bad.append(f'/raw{c}') + bad.append(f'/raw%{ord(c):02X}') + bad.append(f'/pc{c}') + bad.append(f'/pc%{ord(c):02X}') class DefaultEntryTest(BaseRequestRateTest, unittest.TestCase): @@ -299,26 +378,17 @@ def test_string_formatting(self): self.assertEqual(str(self.parser), self.expected_output) -class RobotHandler(BaseHTTPRequestHandler): - - def do_GET(self): - self.send_error(403, "Forbidden access") - - def log_message(self, format, *args): - pass - - @unittest.skipUnless( support.has_socket_support, "Socket server requires working socket." ) -class PasswordProtectedSiteTestCase(unittest.TestCase): +class BaseLocalNetworkTestCase: def setUp(self): # clear _opener global variable self.addCleanup(urllib.request.urlcleanup) - self.server = HTTPServer((socket_helper.HOST, 0), RobotHandler) + self.server = HTTPServer((socket_helper.HOST, 0), self.RobotHandler) self.t = threading.Thread( name='HTTPServer serving', @@ -335,6 +405,57 @@ def tearDown(self): self.t.join() self.server.server_close() + +SAMPLE_ROBOTS_TXT = b'''\ +User-agent: test_robotparser +Disallow: /utf8/\xf0\x9f\x90\x8d +Disallow: /non-utf8/\xf0 +Disallow: //[spam]/path +''' + + +class LocalNetworkTestCase(BaseLocalNetworkTestCase, unittest.TestCase): + class RobotHandler(BaseHTTPRequestHandler): + + def do_GET(self): + self.send_response(200) + self.end_headers() + self.wfile.write(SAMPLE_ROBOTS_TXT) + + def log_message(self, format, *args): + pass + + @threading_helper.reap_threads + def testRead(self): + # Test that reading a weird robots.txt doesn't fail. + addr = self.server.server_address + url = f'http://{socket_helper.HOST}:{addr[1]}' + robots_url = url + '/robots.txt' + parser = urllib.robotparser.RobotFileParser() + parser.set_url(robots_url) + parser.read() + # And it can even interpret the weird paths in some reasonable way. + agent = 'test_robotparser' + self.assertTrue(parser.can_fetch(agent, robots_url)) + self.assertTrue(parser.can_fetch(agent, url + '/utf8/')) + self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) + self.assertFalse(parser.can_fetch(agent, url + '/utf8/%F0%9F%90%8D')) + self.assertFalse(parser.can_fetch(agent, url + '/utf8/\U0001f40d')) + self.assertTrue(parser.can_fetch(agent, url + '/non-utf8/')) + self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/%F0')) + self.assertFalse(parser.can_fetch(agent, url + '/non-utf8/\U0001f40d')) + self.assertFalse(parser.can_fetch(agent, url + '/%2F[spam]/path')) + + +class PasswordProtectedSiteTestCase(BaseLocalNetworkTestCase, unittest.TestCase): + class RobotHandler(BaseHTTPRequestHandler): + + def do_GET(self): + self.send_error(403, "Forbidden access") + + def log_message(self, format, *args): + pass + @threading_helper.reap_threads def testPasswordProtectedSite(self): addr = self.server.server_address diff --git a/Lib/test/test_runpy.py b/Lib/test/test_runpy.py index ada78ec8e6b0c7..a2a07c04f58ef2 100644 --- a/Lib/test/test_runpy.py +++ b/Lib/test/test_runpy.py @@ -796,7 +796,7 @@ def assertSigInt(self, cmd, *args, **kwargs): # Use -E to ignore PYTHONSAFEPATH cmd = [sys.executable, '-E', *cmd] proc = subprocess.run(cmd, *args, **kwargs, text=True, stderr=subprocess.PIPE) - self.assertTrue(proc.stderr.endswith("\nKeyboardInterrupt\n"), proc.stderr) + self.assertEndsWith(proc.stderr, "\nKeyboardInterrupt\n") self.assertEqual(proc.returncode, self.EXPECTED_CODE) def test_pymain_run_file(self): diff --git a/Lib/test/test_scope.py b/Lib/test/test_scope.py index 24a366efc6ca05..520fbc1b66237b 100644 --- a/Lib/test/test_scope.py +++ b/Lib/test/test_scope.py @@ -778,7 +778,7 @@ class X: class X: locals()["x"] = 43 del x - self.assertFalse(hasattr(X, "x")) + self.assertNotHasAttr(X, "x") self.assertEqual(x, 42) @cpython_only diff --git a/Lib/test/test_script_helper.py b/Lib/test/test_script_helper.py index f7871fd3b77c02..eeea6c4842b488 100644 --- a/Lib/test/test_script_helper.py +++ b/Lib/test/test_script_helper.py @@ -74,8 +74,7 @@ class TestScriptHelperEnvironment(unittest.TestCase): """Code coverage for interpreter_requires_environment().""" def setUp(self): - self.assertTrue( - hasattr(script_helper, '__cached_interp_requires_environment')) + self.assertHasAttr(script_helper, '__cached_interp_requires_environment') # Reset the private cached state. script_helper.__dict__['__cached_interp_requires_environment'] = None diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index c01e323553d768..c0df9507bd7f5e 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -237,7 +237,7 @@ def test_pickling(self): if type(self.s) not in (set, frozenset): self.assertEqual(self.s.x, dup.x) self.assertEqual(self.s.z, dup.z) - self.assertFalse(hasattr(self.s, 'y')) + self.assertNotHasAttr(self.s, 'y') del self.s.x, self.s.z def test_iterator_pickling(self): @@ -876,8 +876,8 @@ def test_repr(self): def check_repr_against_values(self): text = repr(self.set) - self.assertTrue(text.startswith('{')) - self.assertTrue(text.endswith('}')) + self.assertStartsWith(text, '{') + self.assertEndsWith(text, '}') result = text[1:-1].split(', ') result.sort() diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py index a13ddcb76b7bcb..2a355abdeeb30f 100644 --- a/Lib/test/test_shlex.py +++ b/Lib/test/test_shlex.py @@ -330,6 +330,7 @@ def testQuote(self): unsafe = '"`$\\!' + unicode_sample self.assertEqual(shlex.quote(''), "''") + self.assertEqual(shlex.quote(None), "''") self.assertEqual(shlex.quote(safeunquoted), safeunquoted) self.assertEqual(shlex.quote('test file name'), "'test file name'") for u in unsafe: @@ -338,6 +339,8 @@ def testQuote(self): for u in unsafe: self.assertEqual(shlex.quote("test%s'name'" % u), "'test%s'\"'\"'name'\"'\"''" % u) + self.assertRaises(TypeError, shlex.quote, 42) + self.assertRaises(TypeError, shlex.quote, b"abc") def testJoin(self): for split_command, command in [ diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 87991fbda4c7df..62c80aab4b3305 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -427,12 +427,12 @@ def check_args_to_onerror(self, func, arg, exc): else: self.assertIs(func, os.listdir) self.assertIn(arg, [TESTFN, self.child_dir_path]) - self.assertTrue(issubclass(exc[0], OSError)) + self.assertIsSubclass(exc[0], OSError) self.errorState += 1 else: self.assertEqual(func, os.rmdir) self.assertEqual(arg, TESTFN) - self.assertTrue(issubclass(exc[0], OSError)) + self.assertIsSubclass(exc[0], OSError) self.errorState = 3 @unittest.skipIf(sys.platform[:6] == 'cygwin', @@ -3479,7 +3479,7 @@ class PublicAPITests(unittest.TestCase): """Ensures that the correct values are exposed in the public API.""" def test_module_all_attribute(self): - self.assertTrue(hasattr(shutil, '__all__')) + self.assertHasAttr(shutil, '__all__') target_api = ['copyfileobj', 'copyfile', 'copymode', 'copystat', 'copy', 'copy2', 'copytree', 'move', 'rmtree', 'Error', 'SpecialFileError', 'make_archive', diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py index a7e9241f44d243..f8c6f37c607cfb 100644 --- a/Lib/test/test_site.py +++ b/Lib/test/test_site.py @@ -13,6 +13,7 @@ from test.support import socket_helper from test.support import captured_stderr from test.support.os_helper import TESTFN, EnvironmentVarGuard +from test.support.script_helper import spawn_python, kill_python import ast import builtins import glob @@ -25,6 +26,7 @@ import sys import sysconfig import tempfile +from textwrap import dedent import urllib.error import urllib.request from unittest import mock @@ -307,8 +309,7 @@ def test_getuserbase(self): with EnvironmentVarGuard() as environ: environ['PYTHONUSERBASE'] = 'xoxo' - self.assertTrue(site.getuserbase().startswith('xoxo'), - site.getuserbase()) + self.assertStartsWith(site.getuserbase(), 'xoxo') @unittest.skipUnless(HAS_USER_SITE, 'need user site') def test_getusersitepackages(self): @@ -318,7 +319,7 @@ def test_getusersitepackages(self): # the call sets USER_BASE *and* USER_SITE self.assertEqual(site.USER_SITE, user_site) - self.assertTrue(user_site.startswith(site.USER_BASE), user_site) + self.assertStartsWith(user_site, site.USER_BASE) self.assertEqual(site.USER_BASE, site.getuserbase()) def test_getsitepackages(self): @@ -359,11 +360,10 @@ def test_no_home_directory(self): environ.unset('PYTHONUSERBASE', 'APPDATA') user_base = site.getuserbase() - self.assertTrue(user_base.startswith('~' + os.sep), - user_base) + self.assertStartsWith(user_base, '~' + os.sep) user_site = site.getusersitepackages() - self.assertTrue(user_site.startswith(user_base), user_site) + self.assertStartsWith(user_site, user_base) with mock.patch('os.path.isdir', return_value=False) as mock_isdir, \ mock.patch.object(site, 'addsitedir') as mock_addsitedir, \ @@ -495,18 +495,18 @@ def test_add_build_dir(self): def test_setting_quit(self): # 'quit' and 'exit' should be injected into builtins - self.assertTrue(hasattr(builtins, "quit")) - self.assertTrue(hasattr(builtins, "exit")) + self.assertHasAttr(builtins, "quit") + self.assertHasAttr(builtins, "exit") def test_setting_copyright(self): # 'copyright', 'credits', and 'license' should be in builtins - self.assertTrue(hasattr(builtins, "copyright")) - self.assertTrue(hasattr(builtins, "credits")) - self.assertTrue(hasattr(builtins, "license")) + self.assertHasAttr(builtins, "copyright") + self.assertHasAttr(builtins, "credits") + self.assertHasAttr(builtins, "license") def test_setting_help(self): # 'help' should be set in builtins - self.assertTrue(hasattr(builtins, "help")) + self.assertHasAttr(builtins, "help") def test_sitecustomize_executed(self): # If sitecustomize is available, it should have been imported. @@ -805,5 +805,110 @@ def test_underpth_dll_file(self): self.assertTrue(rc, "sys.path is incorrect") +class CommandLineTests(unittest.TestCase): + def exists(self, path): + if path is not None and os.path.isdir(path): + return "exists" + else: + return "doesn't exist" + + def get_excepted_output(self, *args): + if len(args) == 0: + user_base = site.getuserbase() + user_site = site.getusersitepackages() + output = io.StringIO() + output.write("sys.path = [\n") + for dir in sys.path: + output.write(" %r,\n" % (dir,)) + output.write("]\n") + output.write(f"USER_BASE: {user_base} ({self.exists(user_base)})\n") + output.write(f"USER_SITE: {user_site} ({self.exists(user_site)})\n") + output.write(f"ENABLE_USER_SITE: {site.ENABLE_USER_SITE}\n") + return 0, dedent(output.getvalue()).strip() + + buffer = [] + if '--user-base' in args: + buffer.append(site.getuserbase()) + if '--user-site' in args: + buffer.append(site.getusersitepackages()) + + if buffer: + return_code = 3 + if site.ENABLE_USER_SITE: + return_code = 0 + elif site.ENABLE_USER_SITE is False: + return_code = 1 + elif site.ENABLE_USER_SITE is None: + return_code = 2 + output = os.pathsep.join(buffer) + return return_code, os.path.normpath(dedent(output).strip()) + else: + return 10, None + + def invoke_command_line(self, *args): + cmd_args = [] + if sys.flags.no_user_site: + cmd_args.append("-s") + cmd_args.extend(["-m", "site", *args]) + + with EnvironmentVarGuard() as env: + env["PYTHONUTF8"] = "1" + env["PYTHONIOENCODING"] = "utf-8" + proc = spawn_python(*cmd_args, text=True, env=env, + encoding='utf-8', errors='replace') + + output = kill_python(proc) + return_code = proc.returncode + return return_code, os.path.normpath(dedent(output).strip()) + + @support.requires_subprocess() + def test_no_args(self): + return_code, output = self.invoke_command_line() + excepted_return_code, _ = self.get_excepted_output() + self.assertEqual(return_code, excepted_return_code) + lines = output.splitlines() + self.assertEqual(lines[0], "sys.path = [") + self.assertEqual(lines[-4], "]") + excepted_base = f"USER_BASE: '{site.getuserbase()}'" +\ + f" ({self.exists(site.getuserbase())})" + self.assertEqual(lines[-3], excepted_base) + excepted_site = f"USER_SITE: '{site.getusersitepackages()}'" +\ + f" ({self.exists(site.getusersitepackages())})" + self.assertEqual(lines[-2], excepted_site) + self.assertEqual(lines[-1], f"ENABLE_USER_SITE: {site.ENABLE_USER_SITE}") + + @support.requires_subprocess() + def test_unknown_args(self): + return_code, output = self.invoke_command_line("--unknown-arg") + excepted_return_code, _ = self.get_excepted_output("--unknown-arg") + self.assertEqual(return_code, excepted_return_code) + self.assertIn('[--user-base] [--user-site]', output) + + @support.requires_subprocess() + def test_base_arg(self): + return_code, output = self.invoke_command_line("--user-base") + excepted = self.get_excepted_output("--user-base") + excepted_return_code, excepted_output = excepted + self.assertEqual(return_code, excepted_return_code) + self.assertEqual(output, excepted_output) + + @support.requires_subprocess() + def test_site_arg(self): + return_code, output = self.invoke_command_line("--user-site") + excepted = self.get_excepted_output("--user-site") + excepted_return_code, excepted_output = excepted + self.assertEqual(return_code, excepted_return_code) + self.assertEqual(output, excepted_output) + + @support.requires_subprocess() + def test_both_args(self): + return_code, output = self.invoke_command_line("--user-base", + "--user-site") + excepted = self.get_excepted_output("--user-base", "--user-site") + excepted_return_code, excepted_output = excepted + self.assertEqual(return_code, excepted_return_code) + self.assertEqual(output, excepted_output) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_smtplib.py b/Lib/test/test_smtplib.py index 4c9fc14bd43f54..fb3ea34d766e50 100644 --- a/Lib/test/test_smtplib.py +++ b/Lib/test/test_smtplib.py @@ -17,6 +17,7 @@ import threading import unittest +import unittest.mock as mock from test import support, mock_socket from test.support import hashlib_helper from test.support import socket_helper @@ -926,11 +927,14 @@ def _auth_cram_md5(self, arg=None): except ValueError as e: self.push('535 Splitting response {!r} into user and password ' 'failed: {}'.format(logpass, e)) - return False - valid_hashed_pass = hmac.HMAC( - sim_auth[1].encode('ascii'), - self._decode_base64(sim_cram_md5_challenge).encode('ascii'), - 'md5').hexdigest() + return + pwd = sim_auth[1].encode('ascii') + msg = self._decode_base64(sim_cram_md5_challenge).encode('ascii') + try: + valid_hashed_pass = hmac.HMAC(pwd, msg, 'md5').hexdigest() + except ValueError: + self.push('504 CRAM-MD5 is not supported') + return self._authenticated(user, hashed_pass == valid_hashed_pass) # end AUTH related stuff. @@ -1181,6 +1185,39 @@ def testAUTH_CRAM_MD5(self): self.assertEqual(resp, (235, b'Authentication Succeeded')) smtp.close() + @mock.patch("hmac.HMAC") + @mock.patch("smtplib._have_cram_md5_support", False) + def testAUTH_CRAM_MD5_blocked(self, hmac_constructor): + # CRAM-MD5 is the only "known" method by the server, + # but it is not supported by the client. In particular, + # no challenge will ever be sent. + self.serv.add_feature("AUTH CRAM-MD5") + smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', + timeout=support.LOOPBACK_TIMEOUT) + self.addCleanup(smtp.close) + msg = re.escape("No suitable authentication method found.") + with self.assertRaisesRegex(smtplib.SMTPException, msg): + smtp.login(sim_auth[0], sim_auth[1]) + hmac_constructor.assert_not_called() # call has been bypassed + + @mock.patch("smtplib._have_cram_md5_support", False) + def testAUTH_CRAM_MD5_blocked_and_fallback(self): + # Test that PLAIN is tried after CRAM-MD5 failed + self.serv.add_feature("AUTH CRAM-MD5 PLAIN") + smtp = smtplib.SMTP(HOST, self.port, local_hostname='localhost', + timeout=support.LOOPBACK_TIMEOUT) + self.addCleanup(smtp.close) + with ( + mock.patch.object(smtp, "auth_cram_md5") as smtp_auth_cram_md5, + mock.patch.object( + smtp, "auth_plain", wraps=smtp.auth_plain + ) as smtp_auth_plain + ): + resp = smtp.login(sim_auth[0], sim_auth[1]) + smtp_auth_plain.assert_called_once() + smtp_auth_cram_md5.assert_not_called() # no call to HMAC constructor + self.assertEqual(resp, (235, b'Authentication Succeeded')) + @hashlib_helper.requires_hashdigest('md5', openssl=True) def testAUTH_multiple(self): # Test that multiple authentication methods are tried. diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 03c54151a2218f..e66160c5236261 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -52,6 +52,7 @@ VSOCKPORT = 1234 AIX = platform.system() == "AIX" +SOLARIS = sys.platform.startswith("sunos") WSL = "microsoft-standard-WSL" in platform.release() try: @@ -1085,9 +1086,7 @@ def test3542SocketOptions(self): 'IPV6_USE_MIN_MTU', } for opt in opts: - self.assertTrue( - hasattr(socket, opt), f"Missing RFC3542 socket option '{opt}'" - ) + self.assertHasAttr(socket, opt) def testHostnameRes(self): # Testing hostname resolution mechanisms @@ -1175,7 +1174,10 @@ def testInterfaceNameIndex(self): 'socket.if_indextoname() not available.') @support.skip_android_selinux('if_indextoname') def testInvalidInterfaceIndexToName(self): - self.assertRaises(OSError, socket.if_indextoname, 0) + with self.assertRaises(OSError) as cm: + socket.if_indextoname(0) + self.assertIsNotNone(cm.exception.errno) + self.assertRaises(ValueError, socket.if_indextoname, -1) self.assertRaises(OverflowError, socket.if_indextoname, 2**1000) self.assertRaises(TypeError, socket.if_indextoname, '_DEADBEEF') @@ -1195,8 +1197,11 @@ def testInvalidInterfaceIndexToName(self): 'socket.if_nametoindex() not available.') @support.skip_android_selinux('if_nametoindex') def testInvalidInterfaceNameToIndex(self): + with self.assertRaises(OSError) as cm: + socket.if_nametoindex("_DEADBEEF") + self.assertIsNotNone(cm.exception.errno) + self.assertRaises(TypeError, socket.if_nametoindex, 0) - self.assertRaises(OSError, socket.if_nametoindex, '_DEADBEEF') @unittest.skipUnless(hasattr(sys, 'getrefcount'), 'test needs sys.getrefcount()') @@ -1593,11 +1598,11 @@ def test_getsockaddrarg(self): @unittest.skipUnless(os.name == "nt", "Windows specific") def test_sock_ioctl(self): - self.assertTrue(hasattr(socket.socket, 'ioctl')) - self.assertTrue(hasattr(socket, 'SIO_RCVALL')) - self.assertTrue(hasattr(socket, 'RCVALL_ON')) - self.assertTrue(hasattr(socket, 'RCVALL_OFF')) - self.assertTrue(hasattr(socket, 'SIO_KEEPALIVE_VALS')) + self.assertHasAttr(socket.socket, 'ioctl') + self.assertHasAttr(socket, 'SIO_RCVALL') + self.assertHasAttr(socket, 'RCVALL_ON') + self.assertHasAttr(socket, 'RCVALL_OFF') + self.assertHasAttr(socket, 'SIO_KEEPALIVE_VALS') s = socket.socket() self.addCleanup(s.close) self.assertRaises(ValueError, s.ioctl, -1, None) @@ -3890,6 +3895,10 @@ def testCMSG_SPACE(self): # Test CMSG_SPACE() with various valid and invalid values, # checking the assumptions used by sendmsg(). toobig = self.socklen_t_limit - socket.CMSG_SPACE(1) + 1 + if SOLARIS and platform.processor() == "sparc": + # On Solaris SPARC, number of bytes returned by socket.CMSG_SPACE + # increases at different lengths; see gh-91214. + toobig -= 3 values = list(range(257)) + list(range(toobig - 257, toobig)) last = socket.CMSG_SPACE(0) @@ -4036,6 +4045,7 @@ def _testFDPassCMSG_LEN(self): self.createAndSendFDs(1) @unittest.skipIf(is_apple, "skipping, see issue #12958") + @unittest.skipIf(SOLARIS, "skipping, see gh-91214") @unittest.skipIf(AIX, "skipping, see issue #22397") @requireAttrs(socket, "CMSG_SPACE") def testFDPassSeparate(self): @@ -4047,6 +4057,7 @@ def testFDPassSeparate(self): @testFDPassSeparate.client_skip @unittest.skipIf(is_apple, "skipping, see issue #12958") + @unittest.skipIf(SOLARIS, "skipping, see gh-91214") @unittest.skipIf(AIX, "skipping, see issue #22397") def _testFDPassSeparate(self): fd0, fd1 = self.newFDs(2) @@ -4060,6 +4071,7 @@ def _testFDPassSeparate(self): len(MSG)) @unittest.skipIf(is_apple, "skipping, see issue #12958") + @unittest.skipIf(SOLARIS, "skipping, see gh-91214") @unittest.skipIf(AIX, "skipping, see issue #22397") @requireAttrs(socket, "CMSG_SPACE") def testFDPassSeparateMinSpace(self): @@ -4074,6 +4086,7 @@ def testFDPassSeparateMinSpace(self): @testFDPassSeparateMinSpace.client_skip @unittest.skipIf(is_apple, "skipping, see issue #12958") + @unittest.skipIf(SOLARIS, "skipping, see gh-91214") @unittest.skipIf(AIX, "skipping, see issue #22397") def _testFDPassSeparateMinSpace(self): fd0, fd1 = self.newFDs(2) @@ -6082,10 +6095,10 @@ def testTimeoutZero(self): class TestExceptions(unittest.TestCase): def testExceptionTree(self): - self.assertTrue(issubclass(OSError, Exception)) - self.assertTrue(issubclass(socket.herror, OSError)) - self.assertTrue(issubclass(socket.gaierror, OSError)) - self.assertTrue(issubclass(socket.timeout, OSError)) + self.assertIsSubclass(OSError, Exception) + self.assertIsSubclass(socket.herror, OSError) + self.assertIsSubclass(socket.gaierror, OSError) + self.assertIsSubclass(socket.timeout, OSError) self.assertIs(socket.error, OSError) self.assertIs(socket.timeout, TimeoutError) @@ -7047,8 +7060,14 @@ def test_aes_cbc(self): self.assertEqual(len(dec), msglen * multiplier) self.assertEqual(dec, msg * multiplier) - @support.requires_linux_version(4, 9) # see issue29324 + @support.requires_linux_version(4, 9) # see gh-73510 def test_aead_aes_gcm(self): + kernel_version = support._get_kernel_version("Linux") + if kernel_version is not None: + if kernel_version >= (6, 16) and kernel_version < (6, 18): + # See https://github.com/python/cpython/issues/139310. + self.skipTest("upstream Linux kernel issue") + key = bytes.fromhex('c939cc13397c1d37de6ae0e1cb7c423c') iv = bytes.fromhex('b3d8cc017cbb89b39e0f67e2') plain = bytes.fromhex('c3b3c41f113a31b73d9a5cd432103069') diff --git a/Lib/test/test_socketserver.py b/Lib/test/test_socketserver.py index 0f62f9eb200e42..2ca356606b260c 100644 --- a/Lib/test/test_socketserver.py +++ b/Lib/test/test_socketserver.py @@ -218,12 +218,16 @@ def test_ForkingUDPServer(self): self.dgram_examine) @requires_unix_sockets + @unittest.skipIf(test.support.is_apple_mobile and test.support.on_github_actions, + "gh-140702: Test fails regularly on iOS simulator on GitHub Actions") def test_UnixDatagramServer(self): self.run_server(socketserver.UnixDatagramServer, socketserver.DatagramRequestHandler, self.dgram_examine) @requires_unix_sockets + @unittest.skipIf(test.support.is_apple_mobile and test.support.on_github_actions, + "gh-140702: Test fails regularly on iOS simulator on GitHub Actions") def test_ThreadingUnixDatagramServer(self): self.run_server(socketserver.ThreadingUnixDatagramServer, socketserver.DatagramRequestHandler, diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index 61b00778f8361c..36c8d87182dcf9 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- import unittest -from test.support import script_helper, captured_stdout, requires_subprocess, requires_resource +from test import support +from test.support import script_helper from test.support.os_helper import TESTFN, unlink, rmtree from test.support.import_helper import unload import importlib @@ -64,7 +65,7 @@ def test_issue7820(self): # two bytes in common with the UTF-8 BOM self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') - @requires_subprocess() + @support.requires_subprocess() def test_20731(self): sub = subprocess.Popen([sys.executable, os.path.join(os.path.dirname(__file__), @@ -145,8 +146,7 @@ def test_error_from_string(self): compile(input, "<string>", "exec") expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \ "ordinal not in range(128)" - self.assertTrue(c.exception.args[0].startswith(expected), - msg=c.exception.args[0]) + self.assertStartsWith(c.exception.args[0], expected) def test_file_parse_error_multiline(self): # gh96611: @@ -173,6 +173,8 @@ def test_tokenizer_fstring_warning_in_first_line(self): os.unlink(TESTFN) +BUFSIZ = 2**13 + class AbstractSourceEncodingTest: def test_default_coding(self): @@ -185,14 +187,20 @@ def test_first_coding_line(self): self.check_script_output(src, br"'\xc3\u20ac'") def test_second_coding_line(self): - src = (b'#\n' + src = (b'#!/usr/bin/python\n' + b'#coding:iso8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\u20ac'") + + def test_second_coding_line_empty_first_line(self): + src = (b'\n' b'#coding:iso8859-15\n' b'print(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xc3\u20ac'") def test_third_coding_line(self): # Only first two lines are tested for a magic comment. - src = (b'#\n' + src = (b'#!/usr/bin/python\n' b'#\n' b'#coding:iso8859-15\n' b'print(ascii("\xc3\xa4"))\n') @@ -210,48 +218,197 @@ def test_double_coding_same_line(self): b'print(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xc3\u20ac'") + def test_double_coding_utf8(self): + src = (b'#coding:utf-8\n' + b'#coding:latin1\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xe4'") + + def test_long_first_coding_line(self): + src = (b'#' + b' '*BUFSIZ + b'coding:iso8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\u20ac'") + + def test_long_second_coding_line(self): + src = (b'#!/usr/bin/python\n' + b'#' + b' '*BUFSIZ + b'coding:iso8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\u20ac'") + + def test_long_coding_line(self): + src = (b'#coding:iso-8859-15' + b' '*BUFSIZ + b'\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\u20ac'") + + def test_long_coding_name(self): + src = (b'#coding:iso-8859-1-' + b'x'*BUFSIZ + b'\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\xa4'") + + def test_long_first_utf8_line(self): + src = b'#' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n' + self.check_script_output(src, b'') + src = b'# ' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n' + self.check_script_output(src, b'') + + def test_long_second_utf8_line(self): + src = b'\n#' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n' + self.check_script_output(src, b'') + src = b'\n# ' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n' + self.check_script_output(src, b'') + def test_first_non_utf8_coding_line(self): src = (b'#coding:iso-8859-15 \xa4\n' b'print(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xc3\u20ac'") def test_second_non_utf8_coding_line(self): - src = (b'\n' + src = (b'#!/usr/bin/python\n' b'#coding:iso-8859-15 \xa4\n' b'print(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xc3\u20ac'") + def test_first_utf8_coding_line_error(self): + src = (b'#coding:ascii \xc3\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"(\(unicode error\) )?'ascii' codec can't decode byte") + + def test_second_utf8_coding_line_error(self): + src = (b'#!/usr/bin/python\n' + b'#coding:ascii \xc3\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"(\(unicode error\) )?'ascii' codec can't decode byte") + def test_utf8_bom(self): src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xe4'") + def test_utf8_bom_utf8_comments(self): + src = (b'\xef\xbb\xbf#\xc3\xa4\n' + b'#\xc3\xa4\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xe4'") + def test_utf8_bom_and_utf8_coding_line(self): src = (b'\xef\xbb\xbf#coding:utf-8\n' b'print(ascii("\xc3\xa4"))\n') self.check_script_output(src, br"'\xe4'") + def test_utf8_bom_and_non_utf8_first_coding_line(self): + src = (b'\xef\xbb\xbf#coding:iso-8859-15\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"encoding problem: iso-8859-15 with BOM", + lineno=1) + + def test_utf8_bom_and_non_utf8_second_coding_line(self): + src = (b'\xef\xbb\xbf#first\n' + b'#coding:iso-8859-15\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"encoding problem: iso-8859-15 with BOM", + lineno=2) + + def test_non_utf8_shebang(self): + src = (b'#!/home/\xa4/bin/python\n' + b'#coding:iso-8859-15\n' + b'print(ascii("\xc3\xa4"))\n') + self.check_script_output(src, br"'\xc3\u20ac'") + + def test_utf8_shebang_error(self): + src = (b'#!/home/\xc3\xa4/bin/python\n' + b'#coding:ascii\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"(\(unicode error\) )?'ascii' codec can't decode byte") + + def test_non_utf8_shebang_error(self): + src = (b'#!/home/\xa4/bin/python\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"Non-UTF-8 code starting with .* on line 1", + lineno=1) + + def test_non_utf8_second_line_error(self): + src = (b'#first\n' + b'#second\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"Non-UTF-8 code starting with .* on line 2", + lineno=2) + + def test_non_utf8_third_line_error(self): + src = (b'#first\n' + b'#second\n' + b'#third\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"Non-UTF-8 code starting with .* on line 3", + lineno=3) + + def test_utf8_bom_non_utf8_third_line_error(self): + src = (b'\xef\xbb\xbf#first\n' + b'#second\n' + b'#third\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"Non-UTF-8 code starting with .* on line 3|" + br"'utf-8' codec can't decode byte", + lineno=3) + + def test_utf_8_non_utf8_third_line_error(self): + src = (b'#coding: utf-8\n' + b'#second\n' + b'#third\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"Non-UTF-8 code starting with .* on line 3|" + br"'utf-8' codec can't decode byte", + lineno=3) + + def test_utf8_non_utf8_third_line_error(self): + src = (b'#coding: utf8\n' + b'#second\n' + b'#third\xa4\n' + b'raise RuntimeError\n') + self.check_script_error(src, + br"'utf-8' codec can't decode byte|" + br"encoding problem: utf8") + def test_crlf(self): src = (b'print(ascii("""\r\n"""))\n') - out = self.check_script_output(src, br"'\n'") + self.check_script_output(src, br"'\n'") def test_crcrlf(self): src = (b'print(ascii("""\r\r\n"""))\n') - out = self.check_script_output(src, br"'\n\n'") + self.check_script_output(src, br"'\n\n'") def test_crcrcrlf(self): src = (b'print(ascii("""\r\r\r\n"""))\n') - out = self.check_script_output(src, br"'\n\n\n'") + self.check_script_output(src, br"'\n\n\n'") def test_crcrcrlf2(self): src = (b'#coding:iso-8859-1\n' b'print(ascii("""\r\r\r\n"""))\n') - out = self.check_script_output(src, br"'\n\n\n'") + self.check_script_output(src, br"'\n\n\n'") + + def test_nul_in_first_coding_line(self): + src = (b'#coding:iso8859-15\x00\n' + b'\n' + b'\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"source code (string )?cannot contain null bytes") + + def test_nul_in_second_coding_line(self): + src = (b'#!/usr/bin/python\n' + b'#coding:iso8859-15\x00\n' + b'\n' + b'raise RuntimeError\n') + self.check_script_error(src, br"source code (string )?cannot contain null bytes") class UTF8ValidatorTest(unittest.TestCase): @unittest.skipIf(not sys.platform.startswith("linux"), "Too slow to run on non-Linux platforms") - @requires_resource('cpu') + @support.requires_resource('cpu') def test_invalid_utf8(self): # This is a port of test_utf8_decode_invalid_sequences in # test_unicode.py to exercise the separate utf8 validator in @@ -317,15 +474,29 @@ def check(content): check(b'\xF4'+cb+b'\xBF\xBF') +@support.force_not_colorized_test_class class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): def check_script_output(self, src, expected): - with captured_stdout() as stdout: + with support.captured_stdout() as stdout: exec(src) out = stdout.getvalue().encode('latin1') self.assertEqual(out.rstrip(), expected) + def check_script_error(self, src, expected, lineno=...): + with self.assertRaises(SyntaxError) as cm: + exec(src) + exc = cm.exception + self.assertRegex(str(exc), expected.decode()) + if lineno is not ...: + self.assertEqual(exc.lineno, lineno) + line = src.splitlines()[lineno-1].decode(errors='replace') + if lineno == 1: + line = line.removeprefix('\ufeff') + self.assertEqual(line, exc.text) + +@support.force_not_colorized_test_class class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): def check_script_output(self, src, expected): @@ -336,6 +507,24 @@ def check_script_output(self, src, expected): res = script_helper.assert_python_ok(fn) self.assertEqual(res.out.rstrip(), expected) + def check_script_error(self, src, expected, lineno=...): + with tempfile.TemporaryDirectory() as tmpd: + fn = os.path.join(tmpd, 'test.py') + with open(fn, 'wb') as fp: + fp.write(src) + res = script_helper.assert_python_failure(fn) + err = res.err.rstrip() + self.assertRegex(err.splitlines()[-1], b'SyntaxError: ' + expected) + if lineno is not ...: + self.assertIn(f', line {lineno}\n'.encode(), + err.replace(os.linesep.encode(), b'\n')) + line = src.splitlines()[lineno-1].decode(errors='replace') + if lineno == 1: + line = line.removeprefix('\ufeff') + line = line.encode(sys.__stderr__.encoding, sys.__stderr__.errors) + self.assertIn(line, err) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sqlite3/__init__.py b/Lib/test/test_sqlite3/__init__.py index d777fca82da4b0..78a1e2078a5da0 100644 --- a/Lib/test/test_sqlite3/__init__.py +++ b/Lib/test/test_sqlite3/__init__.py @@ -8,8 +8,7 @@ # Implement the unittest "load tests" protocol. def load_tests(*args): + if verbose: + print(f"test_sqlite3: testing with SQLite version {sqlite3.sqlite_version}") pkg_dir = os.path.dirname(__file__) return load_package_tests(pkg_dir, *args) - -if verbose: - print(f"test_sqlite3: testing with SQLite version {sqlite3.sqlite_version}") diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index ad0dcb3cccb5da..a03d7cbe16ba84 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -116,6 +116,38 @@ def test_interact_version(self): self.assertEqual(out.count(self.PS2), 0) self.assertIn(sqlite3.sqlite_version, out) + def test_interact_empty_source(self): + out, err = self.run_cli(commands=("", " ")) + self.assertIn(self.MEMORY_DB_MSG, err) + self.assertEndsWith(out, self.PS1) + self.assertEqual(out.count(self.PS1), 3) + self.assertEqual(out.count(self.PS2), 0) + + def test_interact_dot_commands_unknown(self): + out, err = self.run_cli(commands=(".unknown_command", )) + self.assertIn(self.MEMORY_DB_MSG, err) + self.assertEndsWith(out, self.PS1) + self.assertEqual(out.count(self.PS1), 2) + self.assertEqual(out.count(self.PS2), 0) + self.assertIn("Error", err) + # test "unknown_command" is pointed out in the error message + self.assertIn("unknown_command", err) + + def test_interact_dot_commands_empty(self): + out, err = self.run_cli(commands=(".")) + self.assertIn(self.MEMORY_DB_MSG, err) + self.assertEndsWith(out, self.PS1) + self.assertEqual(out.count(self.PS1), 2) + self.assertEqual(out.count(self.PS2), 0) + + def test_interact_dot_commands_with_whitespaces(self): + out, err = self.run_cli(commands=(".version ", ". version")) + self.assertIn(self.MEMORY_DB_MSG, err) + self.assertEqual(out.count(sqlite3.sqlite_version + "\n"), 2) + self.assertEndsWith(out, self.PS1) + self.assertEqual(out.count(self.PS1), 3) + self.assertEqual(out.count(self.PS2), 0) + def test_interact_valid_sql(self): out, err = self.run_cli(commands=("SELECT 1;",)) self.assertIn(self.MEMORY_DB_MSG, err) diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py index c3aa3bf2d7bd9f..1e595bdbd645a2 100644 --- a/Lib/test/test_sqlite3/test_dbapi.py +++ b/Lib/test/test_sqlite3/test_dbapi.py @@ -21,6 +21,7 @@ # 3. This notice may not be removed or altered from any source distribution. import contextlib +import functools import os import sqlite3 as sqlite import subprocess @@ -31,8 +32,7 @@ import warnings from test.support import ( - SHORT_TIMEOUT, check_disallow_instantiation, requires_subprocess, - is_apple, is_emscripten, is_wasi + SHORT_TIMEOUT, check_disallow_instantiation, requires_subprocess ) from test.support import gc_collect from test.support import threading_helper, import_helper @@ -639,6 +639,14 @@ def test_deserialize_corrupt_database(self): class OpenTests(unittest.TestCase): _sql = "create table test(id integer)" + def test_open_with_bytes_path(self): + path = os.fsencode(TESTFN) + self.addCleanup(unlink, path) + self.assertFalse(os.path.exists(path)) + with contextlib.closing(sqlite.connect(path)) as cx: + self.assertTrue(os.path.exists(path)) + cx.execute(self._sql) + def test_open_with_path_like_object(self): """ Checks that we can successfully connect to a database using an object that is PathLike, i.e. has __fspath__(). """ @@ -649,14 +657,21 @@ def test_open_with_path_like_object(self): self.assertTrue(os.path.exists(path)) cx.execute(self._sql) + def get_undecodable_path(self): + path = TESTFN_UNDECODABLE + if not path: + self.skipTest("only works if there are undecodable paths") + try: + open(path, 'wb').close() + except OSError: + self.skipTest(f"can't create file with undecodable path {path!r}") + unlink(path) + return path + @unittest.skipIf(sys.platform == "win32", "skipped on Windows") - @unittest.skipIf(is_apple, "skipped on Apple platforms") - @unittest.skipIf(is_emscripten or is_wasi, "not supported on Emscripten/WASI") - @unittest.skipUnless(TESTFN_UNDECODABLE, "only works if there are undecodable paths") def test_open_with_undecodable_path(self): - path = TESTFN_UNDECODABLE + path = self.get_undecodable_path() self.addCleanup(unlink, path) - self.assertFalse(os.path.exists(path)) with contextlib.closing(sqlite.connect(path)) as cx: self.assertTrue(os.path.exists(path)) cx.execute(self._sql) @@ -696,14 +711,10 @@ def test_open_uri_readonly(self): cx.execute(self._sql) @unittest.skipIf(sys.platform == "win32", "skipped on Windows") - @unittest.skipIf(is_apple, "skipped on Apple platforms") - @unittest.skipIf(is_emscripten or is_wasi, "not supported on Emscripten/WASI") - @unittest.skipUnless(TESTFN_UNDECODABLE, "only works if there are undecodable paths") def test_open_undecodable_uri(self): - path = TESTFN_UNDECODABLE + path = self.get_undecodable_path() self.addCleanup(unlink, path) uri = "file:" + urllib.parse.quote(path) - self.assertFalse(os.path.exists(path)) with contextlib.closing(sqlite.connect(uri, uri=True)) as cx: self.assertTrue(os.path.exists(path)) cx.execute(self._sql) @@ -1058,7 +1069,7 @@ def test_array_size(self): # now set to 2 self.cu.arraysize = 2 - # now make the query return 3 rows + # now make the query return 2 rows from a table of 3 rows self.cu.execute("delete from test") self.cu.execute("insert into test(name) values ('A')") self.cu.execute("insert into test(name) values ('B')") @@ -1068,13 +1079,50 @@ def test_array_size(self): self.assertEqual(len(res), 2) + def test_invalid_array_size(self): + UINT32_MAX = (1 << 32) - 1 + setter = functools.partial(setattr, self.cu, 'arraysize') + + self.assertRaises(TypeError, setter, 1.0) + self.assertRaises(ValueError, setter, -3) + self.assertRaises(OverflowError, setter, UINT32_MAX + 1) + def test_fetchmany(self): + # no active SQL statement + res = self.cu.fetchmany() + self.assertEqual(res, []) + res = self.cu.fetchmany(1000) + self.assertEqual(res, []) + + # test default parameter + self.cu.execute("select name from test") + res = self.cu.fetchmany() + self.assertEqual(len(res), 1) + + # test when the number of requested rows exceeds the actual count + self.cu.execute("select name from test") + res = self.cu.fetchmany(100) + self.assertEqual(len(res), 1) + res = self.cu.fetchmany(100) + self.assertEqual(res, []) + + # test when size = 0 self.cu.execute("select name from test") + res = self.cu.fetchmany(0) + self.assertEqual(res, []) res = self.cu.fetchmany(100) self.assertEqual(len(res), 1) res = self.cu.fetchmany(100) self.assertEqual(res, []) + def test_invalid_fetchmany(self): + UINT32_MAX = (1 << 32) - 1 + fetchmany = self.cu.fetchmany + + self.assertRaises(TypeError, fetchmany, 1.0) + self.assertRaises(ValueError, fetchmany, -3) + self.assertRaises(OverflowError, fetchmany, UINT32_MAX + 1) + def test_fetchmany_kw_arg(self): """Checks if fetchmany works with keyword arguments""" self.cu.execute("select name from test") diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 395b2ef88ab622..67a63907293e8e 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -31,6 +31,7 @@ import platform import sysconfig import functools +from contextlib import nullcontext try: import ctypes except ImportError: @@ -539,9 +540,9 @@ def test_openssl_version(self): openssl_ver = f"OpenSSL {major:d}.{minor:d}.{patch:d}" else: openssl_ver = f"OpenSSL {major:d}.{minor:d}.{fix:d}" - self.assertTrue( - s.startswith((openssl_ver, libressl_ver, "AWS-LC")), - (s, t, hex(n)) + self.assertStartsWith( + s, (openssl_ver, libressl_ver, "AWS-LC"), + (t, hex(n)) ) @support.cpython_only @@ -1238,6 +1239,25 @@ def getpass(self): # Make sure the password function isn't called if it isn't needed ctx.load_cert_chain(CERTFILE, password=getpass_exception) + @threading_helper.requires_working_threading() + def test_load_cert_chain_thread_safety(self): + # gh-134698: _ssl detaches the thread state (and as such, + # releases the GIL and critical sections) around expensive + # OpenSSL calls. Unfortunately, OpenSSL structures aren't + # thread-safe, so executing these calls concurrently led + # to crashes. + ctx = ssl.create_default_context() + + def race(): + ctx.load_cert_chain(CERTFILE) + + threads = [threading.Thread(target=race) for _ in range(8)] + with threading_helper.catch_threading_exception() as cm: + with threading_helper.start_threads(threads): + pass + + self.assertIsNone(cm.exc_value) + def test_load_verify_locations(self): ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) ctx.load_verify_locations(CERTFILE) @@ -1668,7 +1688,7 @@ def test_lib_reason(self): regex = "(NO_START_LINE|UNSUPPORTED_PUBLIC_KEY_TYPE)" self.assertRegex(cm.exception.reason, regex) s = str(cm.exception) - self.assertTrue("NO_START_LINE" in s, s) + self.assertIn("NO_START_LINE", s) def test_subclass(self): # Check that the appropriate SSLError subclass is raised @@ -1683,7 +1703,7 @@ def test_subclass(self): with self.assertRaises(ssl.SSLWantReadError) as cm: c.do_handshake() s = str(cm.exception) - self.assertTrue(s.startswith("The operation did not complete (read)"), s) + self.assertStartsWith(s, "The operation did not complete (read)") # For compatibility self.assertEqual(cm.exception.errno, ssl.SSL_ERROR_WANT_READ) @@ -2843,6 +2863,7 @@ def test_ssl_in_multiple_threads(self): # See GH-124984: OpenSSL is not thread safe. threads = [] + warnings_filters = sys.flags.context_aware_warnings global USE_SAME_TEST_CONTEXT USE_SAME_TEST_CONTEXT = True try: @@ -2851,7 +2872,10 @@ def test_ssl_in_multiple_threads(self): self.test_alpn_protocols, self.test_getpeercert, self.test_crl_check, - self.test_check_hostname_idn, + functools.partial( + self.test_check_hostname_idn, + warnings_filters=warnings_filters, + ), self.test_wrong_cert_tls12, self.test_wrong_cert_tls13, ): @@ -3097,7 +3121,7 @@ def test_dual_rsa_ecc(self): cipher = s.cipher()[0].split('-') self.assertTrue(cipher[:2], ('ECDHE', 'ECDSA')) - def test_check_hostname_idn(self): + def test_check_hostname_idn(self, warnings_filters=True): if support.verbose: sys.stdout.write("\n") @@ -3152,16 +3176,30 @@ def test_check_hostname_idn(self): server_hostname="python.example.org") as s: with self.assertRaises(ssl.CertificateError): s.connect((HOST, server.port)) - with ThreadedEchoServer(context=server_context, chatty=True) as server: - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(UnicodeError): - context.wrap_socket(socket.socket(), - server_hostname='.pythontest.net') - with ThreadedEchoServer(context=server_context, chatty=True) as server: - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(UnicodeDecodeError): - context.wrap_socket(socket.socket(), - server_hostname=b'k\xf6nig.idn.pythontest.net') + with ( + ThreadedEchoServer(context=server_context, chatty=True) as server, + ( + warnings_helper.check_no_resource_warning(self) + if warnings_filters + else nullcontext() + ), + self.assertRaises(UnicodeError), + ): + context.wrap_socket(socket.socket(), server_hostname='.pythontest.net') + + with ( + ThreadedEchoServer(context=server_context, chatty=True) as server, + ( + warnings_helper.check_no_resource_warning(self) + if warnings_filters + else nullcontext() + ), + self.assertRaises(UnicodeDecodeError), + ): + context.wrap_socket( + socket.socket(), + server_hostname=b'k\xf6nig.idn.pythontest.net', + ) def test_wrong_cert_tls12(self): """Connecting when the server rejects the client's certificate @@ -4519,6 +4557,42 @@ def server_callback(identity): with client_context.wrap_socket(socket.socket()) as s: s.connect((HOST, server.port)) + def test_thread_recv_while_main_thread_sends(self): + # GH-137583: Locking was added to calls to send() and recv() on SSL + # socket objects. This seemed fine at the surface level because those + # calls weren't re-entrant, but recv() calls would implicitly mimick + # holding a lock by blocking until it received data. This means that + # if a thread started to infinitely block until data was received, calls + # to send() would deadlock, because it would wait forever on the lock + # that the recv() call held. + data = b"1" * 1024 + event = threading.Event() + def background(sock): + event.set() + received = sock.recv(len(data)) + self.assertEqual(received, data) + + client_context, server_context, hostname = testing_context() + server = ThreadedEchoServer(context=server_context) + with server: + with client_context.wrap_socket(socket.socket(), + server_hostname=hostname) as sock: + sock.connect((HOST, server.port)) + sock.settimeout(1) + sock.setblocking(1) + # Ensure that the server is ready to accept requests + sock.sendall(b"123") + self.assertEqual(sock.recv(3), b"123") + with threading_helper.catch_threading_exception() as cm: + thread = threading.Thread(target=background, + args=(sock,), daemon=True) + thread.start() + event.wait() + sock.sendall(data) + thread.join() + if cm.exc_value is not None: + raise cm.exc_value + @unittest.skipUnless(has_tls_version('TLSv1_3') and ssl.HAS_PHA, "Test needs TLS 1.3 PHA") @@ -5285,7 +5359,7 @@ def test_tlsalerttype(self): class Checked_TLSAlertType(enum.IntEnum): """Alert types for TLSContentType.ALERT messages - See RFC 8466, section B.2 + See RFC 8446, section B.2 """ CLOSE_NOTIFY = 0 UNEXPECTED_MESSAGE = 10 diff --git a/Lib/test/test_stat.py b/Lib/test/test_stat.py index 49013a4bcd8af6..5fd25d5012c425 100644 --- a/Lib/test/test_stat.py +++ b/Lib/test/test_stat.py @@ -157,7 +157,7 @@ def test_mode(self): os.chmod(TESTFN, 0o700) st_mode, modestr = self.get_mode() - self.assertEqual(modestr[:3], '-rw') + self.assertStartsWith(modestr, '-rw') self.assertS_IS("REG", st_mode) self.assertEqual(self.statmod.S_IFMT(st_mode), self.statmod.S_IFREG) @@ -256,7 +256,7 @@ def test_flags_consistent(self): "FILE_ATTRIBUTE_* constants are Win32 specific") def test_file_attribute_constants(self): for key, value in sorted(self.file_attributes.items()): - self.assertTrue(hasattr(self.statmod, key), key) + self.assertHasAttr(self.statmod, key) modvalue = getattr(self.statmod, key) self.assertEqual(value, modvalue, key) @@ -314,7 +314,7 @@ def test_macosx_attribute_values(self): self.assertEqual(self.statmod.S_ISGID, 0o002000) self.assertEqual(self.statmod.S_ISVTX, 0o001000) - self.assertFalse(hasattr(self.statmod, "S_ISTXT")) + self.assertNotHasAttr(self.statmod, "S_ISTXT") self.assertEqual(self.statmod.S_IREAD, self.statmod.S_IRUSR) self.assertEqual(self.statmod.S_IWRITE, self.statmod.S_IWUSR) self.assertEqual(self.statmod.S_IEXEC, self.statmod.S_IXUSR) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index c69baa4bf4d1b1..677a87b51b9192 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -645,7 +645,7 @@ def do_test(self, args): def test_numerictestcase_is_testcase(self): # Ensure that NumericTestCase actually is a TestCase. - self.assertTrue(issubclass(NumericTestCase, unittest.TestCase)) + self.assertIsSubclass(NumericTestCase, unittest.TestCase) def test_error_msg_numeric(self): # Test the error message generated for numeric comparisons. @@ -683,32 +683,23 @@ class GlobalsTest(unittest.TestCase): def test_meta(self): # Test for the existence of metadata. for meta in self.expected_metadata: - self.assertTrue(hasattr(self.module, meta), - "%s not present" % meta) + self.assertHasAttr(self.module, meta) def test_check_all(self): # Check everything in __all__ exists and is public. module = self.module for name in module.__all__: # No private names in __all__: - self.assertFalse(name.startswith("_"), + self.assertNotStartsWith(name, "_", 'private name "%s" in __all__' % name) # And anything in __all__ must exist: - self.assertTrue(hasattr(module, name), - 'missing name "%s" in __all__' % name) + self.assertHasAttr(module, name) class StatisticsErrorTest(unittest.TestCase): def test_has_exception(self): - errmsg = ( - "Expected StatisticsError to be a ValueError, but got a" - " subclass of %r instead." - ) - self.assertTrue(hasattr(statistics, 'StatisticsError')) - self.assertTrue( - issubclass(statistics.StatisticsError, ValueError), - errmsg % statistics.StatisticsError.__base__ - ) + self.assertHasAttr(statistics, 'StatisticsError') + self.assertIsSubclass(statistics.StatisticsError, ValueError) # === Tests for private utility functions === @@ -2014,7 +2005,6 @@ def test_iter_list_same(self): expected = self.func(data) self.assertEqual(self.func(iter(data)), expected) - class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin): # Tests for population variance. def setUp(self): @@ -2122,6 +2112,14 @@ def test_center_not_at_mean(self): self.assertEqual(self.func(data), 2.5) self.assertEqual(self.func(data, mu=0.5), 6.5) + def test_gh_140938(self): + # Inputs with inf/nan should raise a ValueError + with self.assertRaises(ValueError): + self.func([1.0, math.inf]) + with self.assertRaises(ValueError): + self.func([1.0, math.nan]) + + class TestSqrtHelpers(unittest.TestCase): def test_integer_sqrt_of_frac_rto(self): @@ -2355,6 +2353,7 @@ def test_mixed_int_and_float(self): class TestKDE(unittest.TestCase): + @support.requires_resource('cpu') def test_kde(self): kde = statistics.kde StatisticsError = statistics.StatisticsError @@ -3327,7 +3326,8 @@ def tearDown(self): def load_tests(loader, tests, ignore): """Used for doctest/unittest integration.""" tests.addTests(doctest.DocTestSuite()) - tests.addTests(doctest.DocTestSuite(statistics)) + if sys.float_repr_style == 'short': + tests.addTests(doctest.DocTestSuite(statistics)) return tests diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index d6a7bd0da59910..2584fbf72d3fa6 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1231,10 +1231,10 @@ def __repr__(self): self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00') self.assertEqual('{0:<6}'.format(3), '3 ') - self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00') - self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01') - self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00') - self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ') + self.assertEqual('{0:\x00<6}'.format(3.25), '3.25\x00\x00') + self.assertEqual('{0:\x01<6}'.format(3.25), '3.25\x01\x01') + self.assertEqual('{0:\x00^6}'.format(3.25), '\x003.25\x00') + self.assertEqual('{0:^6}'.format(3.25), ' 3.25 ') self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00') self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01') diff --git a/Lib/test/test_strftime.py b/Lib/test/test_strftime.py index 752e31359cf206..375f6aaedd8934 100644 --- a/Lib/test/test_strftime.py +++ b/Lib/test/test_strftime.py @@ -39,7 +39,21 @@ def _update_variables(self, now): if now[3] < 12: self.ampm='(AM|am)' else: self.ampm='(PM|pm)' - self.jan1 = time.localtime(time.mktime((now[0], 1, 1, 0, 0, 0, 0, 1, 0))) + jan1 = time.struct_time( + ( + now.tm_year, # Year + 1, # Month (January) + 1, # Day (1st) + 0, # Hour (0) + 0, # Minute (0) + 0, # Second (0) + -1, # tm_wday (will be determined) + 1, # tm_yday (day 1 of the year) + -1, # tm_isdst (let the system determine) + ) + ) + # use mktime to get the correct tm_wday and tm_isdst values + self.jan1 = time.localtime(time.mktime(jan1)) try: if now[8]: self.tz = time.tzname[1] diff --git a/Lib/test/test_string/_support.py b/Lib/test/test_string/_support.py index eaa3354a559246..cfead782b7d4c1 100644 --- a/Lib/test/test_string/_support.py +++ b/Lib/test/test_string/_support.py @@ -3,33 +3,45 @@ class TStringBaseCase: + def assertInterpolationEqual(self, i, exp): + """Test Interpolation equality. + + The *i* argument must be an Interpolation instance. + + The *exp* argument must be a tuple of the form + (value, expression, conversion, format_spec) where the final three + items may be omitted and are assumed to be '', None and '' respectively. + """ + if len(exp) == 4: + actual = (i.value, i.expression, i.conversion, i.format_spec) + self.assertEqual(actual, exp) + elif len(exp) == 3: + self.assertEqual((i.value, i.expression, i.conversion), exp) + self.assertEqual(i.format_spec, "") + elif len(exp) == 2: + self.assertEqual((i.value, i.expression), exp) + self.assertEqual(i.conversion, None) + self.assertEqual(i.format_spec, "") + elif len(exp) == 1: + self.assertEqual((i.value,), exp) + self.assertEqual(i.expression, "") + self.assertEqual(i.conversion, None) + self.assertEqual(i.format_spec, "") + def assertTStringEqual(self, t, strings, interpolations): """Test template string literal equality. The *strings* argument must be a tuple of strings equal to *t.strings*. The *interpolations* argument must be a sequence of tuples which are - compared against *t.interpolations*. Each tuple consists of - (value, expression, conversion, format_spec), though the final two - items may be omitted, and are assumed to be None and '' respectively. + compared against *t.interpolations*. Each tuple must match the form + described in the `assertInterpolationEqual` method. """ self.assertEqual(t.strings, strings) self.assertEqual(len(t.interpolations), len(interpolations)) for i, exp in zip(t.interpolations, interpolations, strict=True): - if len(exp) == 4: - actual = (i.value, i.expression, i.conversion, i.format_spec) - self.assertEqual(actual, exp) - continue - - if len(exp) == 3: - self.assertEqual((i.value, i.expression, i.conversion), exp) - self.assertEqual(i.format_spec, '') - continue - - self.assertEqual((i.value, i.expression), exp) - self.assertEqual(i.format_spec, '') - self.assertIsNone(i.conversion) + self.assertInterpolationEqual(i, exp) def convert(value, conversion): diff --git a/Lib/test/test_string/test_templatelib.py b/Lib/test/test_string/test_templatelib.py index 5b9490c2be6de0..1c86717155fd5a 100644 --- a/Lib/test/test_string/test_templatelib.py +++ b/Lib/test/test_string/test_templatelib.py @@ -1,7 +1,7 @@ import pickle import unittest from collections.abc import Iterator, Iterable -from string.templatelib import Template, Interpolation +from string.templatelib import Template, Interpolation, convert from test.test_string._support import TStringBaseCase, fstring @@ -45,6 +45,19 @@ def test_basic_creation(self): self.assertEqual(len(t.interpolations), 0) self.assertEqual(fstring(t), 'Hello,\nworld') + def test_interpolation_creation(self): + i = Interpolation('Maria', 'name', 'a', 'fmt') + self.assertInterpolationEqual(i, ('Maria', 'name', 'a', 'fmt')) + + i = Interpolation('Maria', 'name', 'a') + self.assertInterpolationEqual(i, ('Maria', 'name', 'a')) + + i = Interpolation('Maria', 'name') + self.assertInterpolationEqual(i, ('Maria', 'name')) + + i = Interpolation('Maria') + self.assertInterpolationEqual(i, ('Maria',)) + def test_creation_interleaving(self): # Should add strings on either side t = Template(Interpolation('Maria', 'name', None, '')) @@ -148,6 +161,33 @@ def test_iter(self): self.assertEqual(res[1].format_spec, '') self.assertEqual(res[2], ' yz') + def test_exhausted(self): + # See https://github.com/python/cpython/issues/134119. + template_iter = iter(t"{1}") + self.assertIsInstance(next(template_iter), Interpolation) + self.assertRaises(StopIteration, next, template_iter) + self.assertRaises(StopIteration, next, template_iter) + + +class TestFunctions(unittest.TestCase): + def test_convert(self): + from fractions import Fraction + + for obj in ('Café', None, 3.14, Fraction(1, 2)): + with self.subTest(f'{obj=}'): + self.assertEqual(convert(obj, None), obj) + self.assertEqual(convert(obj, 's'), str(obj)) + self.assertEqual(convert(obj, 'r'), repr(obj)) + self.assertEqual(convert(obj, 'a'), ascii(obj)) + + # Invalid conversion specifier + with self.assertRaises(ValueError): + convert(obj, 'z') + with self.assertRaises(ValueError): + convert(obj, 1) + with self.assertRaises(ValueError): + convert(obj, object()) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 268230f6da78f8..e277e7f4ef7b1a 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -221,14 +221,16 @@ def test_ValueError(self): self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d", format="%A") for bad_format in ("%", "% ", "%\n"): - with self.assertRaisesRegex(ValueError, "stray % in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, "stray % in format ")): _strptime._strptime_time("2005", bad_format) - for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ", - "%.", "%+", "%_", "%~", "%\\", + for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ", + "%.", "%+", "%~", "%\\", "%O.", "%O+", "%O_", "%O~", "%O\\"): directive = bad_format[1:].rstrip() - with self.assertRaisesRegex(ValueError, - f"'{re.escape(directive)}' is a bad directive in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, + f"'{re.escape(directive)}' is a bad directive in format ")): _strptime._strptime_time("2005", bad_format) msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \ @@ -335,6 +337,15 @@ def test_month_locale(self): self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + @run_with_locales('LC_TIME', 'az_AZ', 'ber_DZ', 'ber_MA', 'crh_UA') + def test_month_locale2(self): + # Test for month directives + # Month name contains 'İ' ('\u0130') + self.roundtrip('%B', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%b', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%B', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + self.roundtrip('%b', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + def test_day(self): # Test for day directives self.roundtrip('%d %Y', 2) @@ -480,13 +491,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # - # BUG: Generates regexp that does not match the current date and time - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', - 'my_MM', 'or_IN', 'shn_MM', 'az_IR') + 'my_MM', 'or_IN', 'shn_MM', 'az_IR', + 'byn_ER', 'wal_ET', 'lzh_TW') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -525,11 +534,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # BUG: Generates regexp that does not match the current date - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', - 'az_IR', 'my_MM', 'or_IN', 'shn_MM') + 'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW') def test_date_locale(self): # Test %x directive now = time.time() @@ -546,11 +553,11 @@ def test_date_locale(self): # NB: Dates before 1969 do not roundtrip on many locales, including C. @unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390") @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW') def test_date_locale2(self): # Test %x directive loc = locale.getlocale(locale.LC_TIME)[0] - if sys.platform.startswith('sunos'): + if sys.platform.startswith(('sunos', 'aix')): if loc in ('en_US', 'de_DE', 'ar_AE'): self.skipTest(f'locale {loc!r} may not work on this platform') self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -562,11 +569,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', - 'ti_ET', 'tig_ER', 'wal_ET') + 'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW', + 'ar_SA', 'bg_BG') def test_time_locale(self): # Test %X directive loc = locale.getlocale(locale.LC_TIME)[0] diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 7df01f28f09118..cceecdd526c006 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -5,6 +5,7 @@ import math import operator import unittest +import platform import struct import sys import weakref @@ -799,6 +800,23 @@ def test_c_complex_round_trip(self): round_trip = struct.unpack(f, struct.pack(f, z))[0] self.assertComplexesAreIdentical(z, round_trip) + @unittest.skipIf( + support.is_android or support.is_apple_mobile, + "Subinterpreters are not supported on Android and iOS" + ) + def test_endian_table_init_subinterpreters(self): + # Verify that the _struct extension module can be initialized + # concurrently in subinterpreters (gh-140260). + try: + from concurrent.futures import InterpreterPoolExecutor + except ImportError: + raise unittest.SkipTest("InterpreterPoolExecutor not available") + + code = "import struct" + with InterpreterPoolExecutor(max_workers=5) as executor: + results = executor.map(exec, [code] * 5) + self.assertListEqual(list(results), [None] * 5) + class UnpackIteratorTest(unittest.TestCase): """ @@ -917,10 +935,17 @@ def test_half_float(self): # Check that packing produces a bit pattern representing a quiet NaN: # all exponent bits and the msb of the fraction should all be 1. + if platform.machine().startswith('parisc'): + # HP PA RISC uses 0 for quiet, see: + # https://en.wikipedia.org/wiki/NaN#Encoding + expected = 0x7c + else: + expected = 0x7e + packed = struct.pack('<e', math.nan) - self.assertEqual(packed[1] & 0x7e, 0x7e) + self.assertEqual(packed[1] & 0x7e, expected) packed = struct.pack('<e', -math.nan) - self.assertEqual(packed[1] & 0x7e, 0x7e) + self.assertEqual(packed[1] & 0x7e, expected) # Checks for round-to-even behavior format_bits_float__rounding_list = [ diff --git a/Lib/test/test_structseq.py b/Lib/test/test_structseq.py index d0bc0bd7b61520..9622151143cd78 100644 --- a/Lib/test/test_structseq.py +++ b/Lib/test/test_structseq.py @@ -42,7 +42,7 @@ def test_repr(self): # os.stat() gives a complicated struct sequence. st = os.stat(__file__) rep = repr(st) - self.assertTrue(rep.startswith("os.stat_result")) + self.assertStartsWith(rep, "os.stat_result") self.assertIn("st_mode=", rep) self.assertIn("st_ino=", rep) self.assertIn("st_dev=", rep) @@ -307,7 +307,7 @@ def test_copy_replace_with_invisible_fields(self): self.assertEqual(t5.tm_mon, 2) # named invisible fields - self.assertTrue(hasattr(t, 'tm_zone'), f"{t} has no attribute 'tm_zone'") + self.assertHasAttr(t, 'tm_zone') with self.assertRaisesRegex(AttributeError, 'readonly attribute'): t.tm_zone = 'some other zone' self.assertEqual(t2.tm_zone, t.tm_zone) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index ca35804fb36076..806a1e3fa303eb 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -957,6 +957,48 @@ def test_communicate(self): self.assertEqual(stdout, b"banana") self.assertEqual(stderr, b"pineapple") + def test_communicate_memoryview_input(self): + # Test memoryview input with byte elements + test_data = b"Hello, memoryview!" + mv = memoryview(test_data) + p = subprocess.Popen([sys.executable, "-c", + 'import sys; sys.stdout.write(sys.stdin.read())'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + self.addCleanup(p.stdout.close) + self.addCleanup(p.stdin.close) + (stdout, stderr) = p.communicate(mv) + self.assertEqual(stdout, test_data) + self.assertIsNone(stderr) + + def test_communicate_memoryview_input_nonbyte(self): + # Test memoryview input with non-byte elements (e.g., int32) + # This tests the fix for gh-134453 where non-byte memoryviews + # had incorrect length tracking on POSIX + import array + # Create an array of 32-bit integers that's large enough to trigger + # the chunked writing behavior (> PIPE_BUF) + pipe_buf = getattr(select, 'PIPE_BUF', 512) + # Each 'i' element is 4 bytes, so we need more than pipe_buf/4 elements + # Add some extra to ensure we exceed the buffer size + num_elements = pipe_buf + 1 + test_array = array.array('i', [0x64306f66 for _ in range(num_elements)]) + expected_bytes = test_array.tobytes() + mv = memoryview(test_array) + + p = subprocess.Popen([sys.executable, "-c", + 'import sys; ' + 'data = sys.stdin.buffer.read(); ' + 'sys.stdout.buffer.write(data)'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + self.addCleanup(p.stdout.close) + self.addCleanup(p.stdin.close) + (stdout, stderr) = p.communicate(mv) + self.assertEqual(stdout, expected_bytes, + msg=f"{len(stdout)=} =? {len(expected_bytes)=}") + self.assertIsNone(stderr) + def test_communicate_timeout(self): p = subprocess.Popen([sys.executable, "-c", 'import sys,os,time;' @@ -992,6 +1034,62 @@ def test_communicate_timeout_large_output(self): (stdout, _) = p.communicate() self.assertEqual(len(stdout), 4 * 64 * 1024) + def test_communicate_timeout_large_input(self): + # Test that timeout is enforced when writing large input to a + # slow-to-read subprocess, and that partial input is preserved + # for continuation after timeout (gh-141473). + # + # This is a regression test for Windows matching POSIX behavior. + # On POSIX, select() is used to multiplex I/O with timeout checking. + # On Windows, stdin writing must also honor the timeout rather than + # blocking indefinitely when the pipe buffer fills. + + # Input larger than typical pipe buffer (4-64KB on Windows) + input_data = b"x" * (128 * 1024) + + p = subprocess.Popen( + [sys.executable, "-c", + "import sys, time; " + "time.sleep(30); " # Don't read stdin for a long time + "sys.stdout.buffer.write(sys.stdin.buffer.read())"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + try: + timeout = 0.2 + start = time.monotonic() + try: + p.communicate(input_data, timeout=timeout) + # If we get here without TimeoutExpired, the timeout was ignored + elapsed = time.monotonic() - start + self.fail( + f"TimeoutExpired not raised. communicate() completed in " + f"{elapsed:.2f}s, but subprocess sleeps for 30s. " + "Stdin writing blocked without enforcing timeout.") + except subprocess.TimeoutExpired: + elapsed = time.monotonic() - start + + # Timeout should occur close to the specified timeout value, + # not after waiting for the subprocess to finish sleeping. + # Allow generous margin for slow CI, but must be well under + # the subprocess sleep time. + self.assertLess(elapsed, 5.0, + f"TimeoutExpired raised after {elapsed:.2f}s; expected ~{timeout}s. " + "Stdin writing blocked without checking timeout.") + + # After timeout, continue communication. The remaining input + # should be sent and we should receive all data back. + stdout, stderr = p.communicate() + + # Verify all input was eventually received by the subprocess + self.assertEqual(len(stdout), len(input_data), + f"Expected {len(input_data)} bytes output but got {len(stdout)}") + self.assertEqual(stdout, input_data) + finally: + p.kill() + p.wait() + # Test for the fd leak reported in http://bugs.python.org/issue2791. def test_communicate_pipe_fd_leak(self): for stdin_pipe in (False, True): @@ -1062,6 +1160,19 @@ def test_writes_before_communicate(self): self.assertEqual(stdout, b"bananasplit") self.assertEqual(stderr, b"") + def test_communicate_stdin_closed_before_call(self): + # gh-70560, gh-74389: stdin.close() before communicate() + # should not raise ValueError from stdin.flush() + with subprocess.Popen([sys.executable, "-c", + 'import sys; sys.exit(0)'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) as p: + p.stdin.close() # Close stdin before communicate + # This should not raise ValueError + (stdout, stderr) = p.communicate() + self.assertEqual(p.returncode, 0) + def test_universal_newlines_and_text(self): args = [ sys.executable, "-c", @@ -1178,7 +1289,7 @@ def test_universal_newlines_communicate_stdin_stdout_stderr(self): self.assertEqual("line1\nline2\nline3\nline4\nline5\n", stdout) # Python debug build push something like "[42442 refs]\n" # to stderr at exit of subprocess. - self.assertTrue(stderr.startswith("eline2\neline6\neline7\n")) + self.assertStartsWith(stderr, "eline2\neline6\neline7\n") def test_universal_newlines_communicate_encodings(self): # Check that universal newlines mode works for various encodings, @@ -1510,7 +1621,7 @@ def test_issue8780(self): "[sys.executable, '-c', 'print(\"Hello World!\")'])", 'assert retcode == 0')) output = subprocess.check_output([sys.executable, '-c', code]) - self.assertTrue(output.startswith(b'Hello World!'), ascii(output)) + self.assertStartsWith(output, b'Hello World!') def test_handles_closed_on_exception(self): # If CreateProcess exits with an error, ensure the @@ -1643,6 +1754,40 @@ def test_wait_negative_timeout(self): self.assertEqual(proc.wait(), 0) + def test_post_timeout_communicate_sends_input(self): + """GH-141473 regression test; the stdin pipe must close""" + with subprocess.Popen( + [sys.executable, "-uc", """\ +import sys +while c := sys.stdin.read(512): + sys.stdout.write(c) +print() +"""], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) as proc: + try: + data = f"spam{'#'*4096}beans" + proc.communicate( + input=data, + timeout=0, + ) + except subprocess.TimeoutExpired as exc: + pass + # Prior to the bugfix, this would hang as the stdin + # pipe to the child had not been closed. + try: + stdout, stderr = proc.communicate(timeout=15) + except subprocess.TimeoutExpired as exc: + self.fail("communicate() hung waiting on child process that should have seen its stdin pipe close and exit") + self.assertEqual( + proc.returncode, 0, + msg=f"STDERR:\n{stderr}\nSTDOUT:\n{stdout}") + self.assertStartsWith(stdout, "spam") + self.assertIn("beans", stdout) + class RunFuncTestCase(BaseTestCase): def run_python(self, code, **kwargs): @@ -1835,8 +1980,8 @@ def test_encoding_warning(self): capture_output=True) lines = cp.stderr.splitlines() self.assertEqual(len(lines), 2, lines) - self.assertTrue(lines[0].startswith(b"<string>:2: EncodingWarning: ")) - self.assertTrue(lines[1].startswith(b"<string>:3: EncodingWarning: ")) + self.assertStartsWith(lines[0], b"<string>:2: EncodingWarning: ") + self.assertStartsWith(lines[1], b"<string>:3: EncodingWarning: ") def _get_test_grp_name(): diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index 5cef612a340be9..193c8b7d7f3e13 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -547,11 +547,11 @@ def test_special_methods(self): self.assertEqual(s.__reduce__, e.__reduce__) self.assertEqual(s.__reduce_ex__, e.__reduce_ex__) self.assertEqual(s.__getstate__, e.__getstate__) - self.assertFalse(hasattr(s, '__getnewargs__')) - self.assertFalse(hasattr(s, '__getnewargs_ex__')) - self.assertFalse(hasattr(s, '__setstate__')) - self.assertFalse(hasattr(s, '__copy__')) - self.assertFalse(hasattr(s, '__deepcopy__')) + self.assertNotHasAttr(s, '__getnewargs__') + self.assertNotHasAttr(s, '__getnewargs_ex__') + self.assertNotHasAttr(s, '__setstate__') + self.assertNotHasAttr(s, '__copy__') + self.assertNotHasAttr(s, '__deepcopy__') def test_pickling(self): e = E() diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 8446da03e3645b..dba1dbf2c5a8e6 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -407,10 +407,10 @@ class Obj: with support.swap_attr(obj, "y", 5) as y: self.assertEqual(obj.y, 5) self.assertIsNone(y) - self.assertFalse(hasattr(obj, 'y')) + self.assertNotHasAttr(obj, 'y') with support.swap_attr(obj, "y", 5): del obj.y - self.assertFalse(hasattr(obj, 'y')) + self.assertNotHasAttr(obj, 'y') def test_swap_item(self): D = {"x":1} @@ -784,14 +784,14 @@ def test_get_signal_name(self): def test_linked_to_musl(self): linked = support.linked_to_musl() self.assertIsNotNone(linked) - if support.is_wasi or support.is_emscripten: + if support.is_wasm32: self.assertTrue(linked) # The value is cached, so make sure it returns the same value again. self.assertIs(linked, support.linked_to_musl()) - # The unlike libc, the musl version is a triple. + # The musl version is either triple or just a major version number. if linked: self.assertIsInstance(linked, tuple) - self.assertEqual(3, len(linked)) + self.assertIn(len(linked), (1, 3)) for v in linked: self.assertIsInstance(v, int) diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index 24d89b09d946ad..9537de3756fd32 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -527,6 +527,13 @@ def test_symtable_entry_repr(self): expected = f"<symtable entry top({self.top.get_id()}), line {self.top.get_lineno()}>" self.assertEqual(repr(self.top._table), expected) + def test__symtable_refleak(self): + # Regression test for reference leak in PyUnicode_FSDecoder. + # See https://github.com/python/cpython/issues/139748. + mortal_str = 'this is a mortal string' + # check error path when 'compile_type' AC conversion failed + self.assertRaises(TypeError, symtable.symtable, '', mortal_str, 1) + class ComprehensionTests(unittest.TestCase): def get_identifiers_recursive(self, st, res): diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 0ee17849e28121..de8c94fe8d2d18 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -382,6 +382,13 @@ Traceback (most recent call last): SyntaxError: invalid syntax +# But prefixes of soft keywords should +# still raise specialized errors + +>>> (mat x) +Traceback (most recent call last): +SyntaxError: invalid syntax. Perhaps you forgot a comma? + From compiler_complex_args(): >>> def f(None=1): @@ -1431,6 +1438,23 @@ Traceback (most recent call last): SyntaxError: cannot use except* statement with literal +Regression tests for gh-133999: + + >>> try: pass + ... except TypeError as name: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + + >>> try: pass + ... except* TypeError as name: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + + >>> match 1: + ... case 1 | 2 as abc: raise from None + Traceback (most recent call last): + SyntaxError: invalid syntax + Ensure that early = are not matched by the parser as invalid comparisons >>> f(2, 4, x=34); 1 $ 2 Traceback (most recent call last): @@ -2087,6 +2111,25 @@ Traceback (most recent call last): SyntaxError: cannot use subscript as import target +# Check that we don't raise a "cannot use name as import target" error +# if there is an error in an unrelated statement after ';' + +>>> import a as b; None = 1 +Traceback (most recent call last): +SyntaxError: cannot assign to None + +>>> import a, b as c; d = 1; None = 1 +Traceback (most recent call last): +SyntaxError: cannot assign to None + +>>> from a import b as c; None = 1 +Traceback (most recent call last): +SyntaxError: cannot assign to None + +>>> from a import b, c as d; e = 1; None = 1 +Traceback (most recent call last): +SyntaxError: cannot assign to None + # Check that we dont raise the "trailing comma" error if there is more # input to the left of the valid part that we parsed. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 59ef5c993099f0..e2117b9588dba2 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -24,7 +24,7 @@ from test.support import force_not_colorized from test.support import SHORT_TIMEOUT try: - from test.support import interpreters + from concurrent import interpreters except ImportError: interpreters = None import textwrap @@ -57,7 +57,7 @@ def test_original_displayhook(self): dh(None) self.assertEqual(out.getvalue(), "") - self.assertTrue(not hasattr(builtins, "_")) + self.assertNotHasAttr(builtins, "_") # sys.displayhook() requires arguments self.assertRaises(TypeError, dh) @@ -172,7 +172,7 @@ def test_original_excepthook(self): with support.captured_stderr() as err: sys.__excepthook__(*sys.exc_info()) - self.assertTrue(err.getvalue().endswith("ValueError: 42\n")) + self.assertEndsWith(err.getvalue(), "ValueError: 42\n") self.assertRaises(TypeError, sys.__excepthook__) @@ -192,7 +192,7 @@ def test_excepthook_bytes_filename(self): err = err.getvalue() self.assertIn(""" File "b'bytes_filename'", line 123\n""", err) self.assertIn(""" text\n""", err) - self.assertTrue(err.endswith("SyntaxError: msg\n")) + self.assertEndsWith(err, "SyntaxError: msg\n") def test_excepthook(self): with test.support.captured_output("stderr") as stderr: @@ -269,8 +269,7 @@ def check_exit_message(code, expected, **env_vars): rc, out, err = assert_python_failure('-c', code, **env_vars) self.assertEqual(rc, 1) self.assertEqual(out, b'') - self.assertTrue(err.startswith(expected), - "%s doesn't start with %s" % (ascii(err), ascii(expected))) + self.assertStartsWith(err, expected) # test that stderr buffer is flushed before the exit message is written # into stderr @@ -437,7 +436,7 @@ def test_call_tracing(self): @unittest.skipUnless(hasattr(sys, "setdlopenflags"), 'test needs sys.setdlopenflags()') def test_dlopenflags(self): - self.assertTrue(hasattr(sys, "getdlopenflags")) + self.assertHasAttr(sys, "getdlopenflags") self.assertRaises(TypeError, sys.getdlopenflags, 42) oldflags = sys.getdlopenflags() self.assertRaises(TypeError, sys.setdlopenflags) @@ -623,8 +622,7 @@ def g456(): # And the next record must be for g456(). filename, lineno, funcname, sourceline = stack[i+1] self.assertEqual(funcname, "g456") - self.assertTrue((sourceline.startswith("if leave_g.wait(") or - sourceline.startswith("g_raised.set()"))) + self.assertStartsWith(sourceline, ("if leave_g.wait(", "g_raised.set()")) finally: # Reap the spawned thread. leave_g.set() @@ -860,7 +858,7 @@ def test_sys_flags(self): "hash_randomization", "isolated", "dev_mode", "utf8_mode", "warn_default_encoding", "safe_path", "int_max_str_digits") for attr in attrs: - self.assertTrue(hasattr(sys.flags, attr), attr) + self.assertHasAttr(sys.flags, attr) attr_type = bool if attr in ("dev_mode", "safe_path") else int self.assertEqual(type(getattr(sys.flags, attr)), attr_type, attr) self.assertTrue(repr(sys.flags)) @@ -871,12 +869,7 @@ def test_sys_flags(self): def assert_raise_on_new_sys_type(self, sys_attr): # Users are intentionally prevented from creating new instances of # sys.flags, sys.version_info, and sys.getwindowsversion. - arg = sys_attr - attr_type = type(sys_attr) - with self.assertRaises(TypeError): - attr_type(arg) - with self.assertRaises(TypeError): - attr_type.__new__(attr_type, arg) + support.check_disallow_instantiation(self, type(sys_attr), sys_attr) def test_sys_flags_no_instantiation(self): self.assert_raise_on_new_sys_type(sys.flags) @@ -1072,10 +1065,11 @@ def test_implementation(self): levels = {'alpha': 0xA, 'beta': 0xB, 'candidate': 0xC, 'final': 0xF} - self.assertTrue(hasattr(sys.implementation, 'name')) - self.assertTrue(hasattr(sys.implementation, 'version')) - self.assertTrue(hasattr(sys.implementation, 'hexversion')) - self.assertTrue(hasattr(sys.implementation, 'cache_tag')) + self.assertHasAttr(sys.implementation, 'name') + self.assertHasAttr(sys.implementation, 'version') + self.assertHasAttr(sys.implementation, 'hexversion') + self.assertHasAttr(sys.implementation, 'cache_tag') + self.assertHasAttr(sys.implementation, 'supports_isolated_interpreters') version = sys.implementation.version self.assertEqual(version[:2], (version.major, version.minor)) @@ -1089,6 +1083,15 @@ def test_implementation(self): self.assertEqual(sys.implementation.name, sys.implementation.name.lower()) + # https://peps.python.org/pep-0734 + sii = sys.implementation.supports_isolated_interpreters + self.assertIsInstance(sii, bool) + if test.support.check_impl_detail(cpython=True): + if test.support.is_emscripten or test.support.is_wasi: + self.assertFalse(sii) + else: + self.assertTrue(sii) + @test.support.cpython_only def test_debugmallocstats(self): # Test sys._debugmallocstats() @@ -1137,23 +1140,12 @@ def test_getallocatedblocks(self): b = sys.getallocatedblocks() self.assertLessEqual(b, a) try: - # While we could imagine a Python session where the number of - # multiple buffer objects would exceed the sharing of references, - # it is unlikely to happen in a normal test run. - # - # In free-threaded builds each code object owns an array of - # pointers to copies of the bytecode. When the number of - # code objects is a large fraction of the total number of - # references, this can cause the total number of allocated - # blocks to exceed the total number of references. - # - # For some reason, iOS seems to trigger the "unlikely to happen" - # case reliably under CI conditions. It's not clear why; but as - # this test is checking the behavior of getallocatedblock() - # under garbage collection, we can skip this pre-condition check - # for now. See GH-130384. - if not support.Py_GIL_DISABLED and not support.is_apple_mobile: - self.assertLess(a, sys.gettotalrefcount()) + # The reported blocks will include immortalized strings, but the + # total ref count will not. This will sanity check that among all + # other objects (those eligible for garbage collection) there + # are more references being tracked than allocated blocks. + interned_immortal = sys.getunicodeinternedsize(_only_immortal=True) + self.assertLess(a - interned_immortal, sys.gettotalrefcount()) except AttributeError: # gettotalrefcount() not available pass @@ -1419,7 +1411,7 @@ def __del__(self): else: self.assertIn("ValueError", report) self.assertIn("del is broken", report) - self.assertTrue(report.endswith("\n")) + self.assertEndsWith(report, "\n") def test_original_unraisablehook_exception_qualname(self): # See bpo-41031, bpo-45083. @@ -1955,33 +1947,19 @@ def write(self, s): self.assertEqual(out, b"") self.assertEqual(err, b"") - -def _supports_remote_attaching(): - PROCESS_VM_READV_SUPPORTED = False - - try: - from _remote_debugging import PROCESS_VM_READV_SUPPORTED - except ImportError: - pass - - return PROCESS_VM_READV_SUPPORTED - -@unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled") -@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32", - "Test only runs on Linux, Windows and MacOS") -@unittest.skipIf(sys.platform == "linux" and not _supports_remote_attaching(), - "Test only runs on Linux with process_vm_readv support") +@test.support.support_remote_exec_only @test.support.cpython_only class TestRemoteExec(unittest.TestCase): def tearDown(self): test.support.reap_children() - def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologue=''): + def _run_remote_exec_test(self, script_code, python_args=None, env=None, + prologue='', + script_path=os_helper.TESTFN + '_remote.py'): # Create the script that will be remotely executed - script = os_helper.TESTFN + '_remote.py' - self.addCleanup(os_helper.unlink, script) + self.addCleanup(os_helper.unlink, script_path) - with open(script, 'w') as f: + with open(script_path, 'w') as f: f.write(script_code) # Create and run the target process @@ -2050,7 +2028,7 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu self.assertEqual(response, b"ready") # Try remote exec on the target process - sys.remote_exec(proc.pid, script) + sys.remote_exec(proc.pid, script_path) # Signal script to continue client_socket.sendall(b"continue") @@ -2073,14 +2051,32 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu def test_remote_exec(self): """Test basic remote exec functionality""" - script = ''' -print("Remote script executed successfully!") -''' + script = 'print("Remote script executed successfully!")' returncode, stdout, stderr = self._run_remote_exec_test(script) # self.assertEqual(returncode, 0) self.assertIn(b"Remote script executed successfully!", stdout) self.assertEqual(stderr, b"") + def test_remote_exec_bytes(self): + script = 'print("Remote script executed successfully!")' + script_path = os.fsencode(os_helper.TESTFN) + b'_bytes_remote.py' + returncode, stdout, stderr = self._run_remote_exec_test(script, + script_path=script_path) + self.assertIn(b"Remote script executed successfully!", stdout) + self.assertEqual(stderr, b"") + + @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, 'requires undecodable path') + @unittest.skipIf(sys.platform == 'darwin', + 'undecodable paths are not supported on macOS') + def test_remote_exec_undecodable(self): + script = 'print("Remote script executed successfully!")' + script_path = os_helper.TESTFN_UNDECODABLE + b'_undecodable_remote.py' + for script_path in [script_path, os.fsdecode(script_path)]: + returncode, stdout, stderr = self._run_remote_exec_test(script, + script_path=script_path) + self.assertIn(b"Remote script executed successfully!", stdout) + self.assertEqual(stderr, b"") + def test_remote_exec_with_self_process(self): """Test remote exec with the target process being the same as the test process""" @@ -2110,7 +2106,7 @@ def audit_hook(event, arg): returncode, stdout, stderr = self._run_remote_exec_test(script, prologue=prologue) self.assertEqual(returncode, 0) self.assertIn(b"Remote script executed successfully!", stdout) - self.assertIn(b"Audit event: remote_debugger_script, arg: ", stdout) + self.assertIn(b"Audit event: cpython.remote_debugger_script, arg: ", stdout) self.assertEqual(stderr, b"") def test_remote_exec_with_exception(self): @@ -2157,6 +2153,13 @@ def test_remote_exec_invalid_pid(self): with self.assertRaises(OSError): sys.remote_exec(99999, "print('should not run')") + def test_remote_exec_invalid_script(self): + """Test remote exec with invalid script type""" + with self.assertRaises(TypeError): + sys.remote_exec(0, None) + with self.assertRaises(TypeError): + sys.remote_exec(0, 123) + def test_remote_exec_syntax_error(self): """Test remote exec with syntax error in script""" script = ''' diff --git a/Lib/test/test_sys_setprofile.py b/Lib/test/test_sys_setprofile.py index 345c022bd2374c..a22b728b569419 100644 --- a/Lib/test/test_sys_setprofile.py +++ b/Lib/test/test_sys_setprofile.py @@ -272,6 +272,8 @@ def g(p): self.check_events(g, [(1, 'call', g_ident, None), (2, 'call', f_ident, None), (2, 'return', f_ident, 0), + (2, 'call', f_ident, None), + (2, 'return', f_ident, None), (1, 'return', g_ident, None), ], check_args=True) diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 53e55383bf9c72..09eff11179ea58 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -186,7 +186,7 @@ def test_posix_venv_scheme(self): # The include directory on POSIX isn't exactly the same as before, # but it is "within" sysconfig_includedir = sysconfig.get_path('include', scheme='posix_venv', vars=vars) - self.assertTrue(sysconfig_includedir.startswith(incpath + os.sep)) + self.assertStartsWith(sysconfig_includedir, incpath + os.sep) def test_nt_venv_scheme(self): # The following directories were hardcoded in the venv module @@ -353,6 +353,13 @@ def test_get_platform(self): self.assertEqual(get_platform(), 'macosx-10.4-%s' % arch) + for macver in range(11, 16): + _osx_support._remove_original_values(get_config_vars()) + get_config_vars()['CFLAGS'] = ('-fno-strict-overflow -Wsign-compare -Wunreachable-code' + '-arch arm64 -fno-common -dynamic -DNDEBUG -g -O3 -Wall') + get_config_vars()['MACOSX_DEPLOYMENT_TARGET'] = f"{macver}.0" + self.assertEqual(get_platform(), 'macosx-%d.0-arm64' % macver) + # linux debian sarge os.name = 'posix' sys.version = ('2.3.5 (#1, Jul 4 2007, 17:28:59) ' @@ -531,13 +538,10 @@ def test_srcdir(self): Python_h = os.path.join(srcdir, 'Include', 'Python.h') self.assertTrue(os.path.exists(Python_h), Python_h) # <srcdir>/PC/pyconfig.h.in always exists even if unused - pyconfig_h = os.path.join(srcdir, 'PC', 'pyconfig.h.in') - self.assertTrue(os.path.exists(pyconfig_h), pyconfig_h) pyconfig_h_in = os.path.join(srcdir, 'pyconfig.h.in') self.assertTrue(os.path.exists(pyconfig_h_in), pyconfig_h_in) if os.name == 'nt': - # <executable dir>/pyconfig.h exists on Windows in a build tree - pyconfig_h = os.path.join(sys.executable, '..', 'pyconfig.h') + pyconfig_h = os.path.join(srcdir, 'PC', 'pyconfig.h') self.assertTrue(os.path.exists(pyconfig_h), pyconfig_h) elif os.name == 'posix': makefile_dir = os.path.dirname(sysconfig.get_makefile_filename()) @@ -572,8 +576,7 @@ def test_linux_ext_suffix(self): expected_suffixes = 'i386-linux-gnu.so', 'x86_64-linux-gnux32.so', 'i386-linux-musl.so' else: # 8 byte pointer size expected_suffixes = 'x86_64-linux-gnu.so', 'x86_64-linux-musl.so' - self.assertTrue(suffix.endswith(expected_suffixes), - f'unexpected suffix {suffix!r}') + self.assertEndsWith(suffix, expected_suffixes) @unittest.skipUnless(sys.platform == 'android', 'Android-specific test') def test_android_ext_suffix(self): @@ -585,13 +588,12 @@ def test_android_ext_suffix(self): "aarch64": "aarch64-linux-android", "armv7l": "arm-linux-androideabi", }[machine] - self.assertTrue(suffix.endswith(f"-{expected_triplet}.so"), - f"{machine=}, {suffix=}") + self.assertEndsWith(suffix, f"-{expected_triplet}.so") @unittest.skipUnless(sys.platform == 'darwin', 'OS X-specific test') def test_osx_ext_suffix(self): suffix = sysconfig.get_config_var('EXT_SUFFIX') - self.assertTrue(suffix.endswith('-darwin.so'), suffix) + self.assertEndsWith(suffix, '-darwin.so') def test_always_set_py_debug(self): self.assertIn('Py_DEBUG', sysconfig.get_config_vars()) @@ -714,11 +716,11 @@ def test_sysconfigdata_json(self): ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase'} # Keys dependent on Python being run from the prefix targetted when building (different on relocatable installs) if sysconfig._installation_is_relocated(): - ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase'} + ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase', 'srcdir'} for key in ignore_keys: - json_config_vars.pop(key) - system_config_vars.pop(key) + json_config_vars.pop(key, None) + system_config_vars.pop(key, None) self.assertEqual(system_config_vars, json_config_vars) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2d9649237a9382..860413b88eb6b5 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -55,6 +55,7 @@ def sha256sum(data): zstname = os.path.join(TEMPDIR, "testtar.tar.zst") tmpname = os.path.join(TEMPDIR, "tmp.tar") dotlessname = os.path.join(TEMPDIR, "testtar") +SPACE = b" " sha256_regtype = ( "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce" @@ -840,6 +841,57 @@ def test_next_on_empty_tarfile(self): with tarfile.open(fileobj=fd, mode="r") as tf: self.assertEqual(tf.next(), None) + def _setup_symlink_to_target(self, temp_dirpath): + target_filepath = os.path.join(temp_dirpath, "target") + ustar_dirpath = os.path.join(temp_dirpath, "ustar") + hardlink_filepath = os.path.join(ustar_dirpath, "lnktype") + with open(target_filepath, "wb") as f: + f.write(b"target") + os.makedirs(ustar_dirpath) + os.symlink(target_filepath, hardlink_filepath) + return target_filepath, hardlink_filepath + + def _assert_on_file_content(self, filepath, digest): + with open(filepath, "rb") as f: + data = f.read() + self.assertEqual(sha256sum(data), digest) + + @unittest.skipUnless( + hasattr(os, "link"), "Missing hardlink implementation" + ) + @os_helper.skip_unless_symlink + def test_extract_hardlink_on_symlink(self): + """ + This test verifies that extracting a hardlink will not follow an + existing symlink after a FileExistsError on os.link. + """ + with os_helper.temp_dir() as DIR: + target_filepath, hardlink_filepath = self._setup_symlink_to_target(DIR) + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tar.extract("ustar/regtype", DIR, filter="data") + tar.extract("ustar/lnktype", DIR, filter="data") + self._assert_on_file_content(target_filepath, sha256sum(b"target")) + self._assert_on_file_content(hardlink_filepath, sha256_regtype) + + @unittest.skipUnless( + hasattr(os, "link"), "Missing hardlink implementation" + ) + @os_helper.skip_unless_symlink + def test_extractall_hardlink_on_symlink(self): + """ + This test verifies that extracting a hardlink will not follow an + existing symlink after a FileExistsError on os.link. + """ + with os_helper.temp_dir() as DIR: + target_filepath, hardlink_filepath = self._setup_symlink_to_target(DIR) + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tar.extractall( + DIR, members=["ustar/regtype", "ustar/lnktype"], filter="data", + ) + self._assert_on_file_content(target_filepath, sha256sum(b"target")) + self._assert_on_file_content(hardlink_filepath, sha256_regtype) + + class MiscReadTest(MiscReadTestBase, unittest.TestCase): test_fail_comp = None @@ -1650,7 +1702,7 @@ def test_cwd(self): try: for t in tar: if t.name != ".": - self.assertTrue(t.name.startswith("./"), t.name) + self.assertStartsWith(t.name, "./") finally: tar.close() @@ -1736,6 +1788,16 @@ def test_file_mode(self): finally: os.umask(original_umask) + def test_pathlike_name(self): + expected_name = os.path.abspath(tmpname) + tarpath = os_helper.FakePath(tmpname) + + for func in (tarfile.open, tarfile.TarFile.open): + with self.subTest(): + with func(tarpath, self.mode) as tar: + self.assertEqual(tar.name, expected_name) + os_helper.unlink(tmpname) + class GzipStreamWriteTest(GzipTest, StreamWriteTest): def test_source_directory_not_leaked(self): @@ -2715,6 +2777,31 @@ def test_useful_error_message_when_modules_missing(self): str(excinfo.exception), ) + @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') + @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") + @unittest.mock.patch('os.chmod') + def test_deferred_directory_attributes_update(self, mock_chmod): + # Regression test for gh-127987: setting attributes on arbitrary files + tempdir = os.path.join(TEMPDIR, 'test127987') + def mock_chmod_side_effect(path, mode, **kwargs): + target_path = os.path.realpath(path) + if os.path.commonpath([target_path, tempdir]) != tempdir: + raise Exception("should not try to chmod anything outside the destination", target_path) + mock_chmod.side_effect = mock_chmod_side_effect + + outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') + with ArchiveMaker() as arc: + arc.add('x', symlink_to='.') + arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') + arc.add('x', symlink_to=outside_tree_dir) + + os.makedirs(outside_tree_dir) + try: + arc.open().extractall(path=tempdir, filter='tar') + finally: + os_helper.rmtree(outside_tree_dir) + os_helper.rmtree(tempdir) + class CommandLineTest(unittest.TestCase): @@ -3275,6 +3362,10 @@ def check_files_present(self, directory): got_paths = set( p.relative_to(directory) for p in pathlib.Path(directory).glob('**/*')) + if self.extraction_filter in (None, 'data'): + # The 'data' filter is expected to reject special files + for path in 'ustar/fifotype', 'ustar/blktype', 'ustar/chrtype': + got_paths.discard(pathlib.Path(path)) self.assertEqual(self.control_paths, got_paths) @contextmanager @@ -3490,11 +3581,12 @@ class ArchiveMaker: with t.open() as tar: ... # `tar` is now a TarFile with 'filename' in it! """ - def __init__(self): + def __init__(self, **kwargs): self.bio = io.BytesIO() + self.tar_kwargs = dict(kwargs) def __enter__(self): - self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio) + self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio, **self.tar_kwargs) return self def __exit__(self, *exc): @@ -3503,12 +3595,28 @@ def __exit__(self, *exc): self.bio = None def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, - mode=None, size=None, **kwargs): - """Add a member to the test archive. Call within `with`.""" + mode=None, size=None, content=None, **kwargs): + """Add a member to the test archive. Call within `with`. + + Provides many shortcuts: + - default `type` is based on symlink_to, hardlink_to, and trailing `/` + in name (which is stripped) + - size & content defaults are based on each other + - content can be str or bytes + - mode should be textual ('-rwxrwxrwx') + + (add more! this is unstable internal test-only API) + """ name = str(name) tarinfo = tarfile.TarInfo(name).replace(**kwargs) + if content is not None: + if isinstance(content, str): + content = content.encode() + size = len(content) if size is not None: tarinfo.size = size + if content is None: + content = bytes(tarinfo.size) if mode: tarinfo.mode = _filemode_to_int(mode) if symlink_to is not None: @@ -3522,7 +3630,7 @@ def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, if type is not None: tarinfo.type = type if tarinfo.isreg(): - fileobj = io.BytesIO(bytes(tarinfo.size)) + fileobj = io.BytesIO(content) else: fileobj = None self.tar_w.addfile(tarinfo, fileobj) @@ -3556,7 +3664,7 @@ class TestExtractionFilters(unittest.TestCase): destdir = outerdir / 'dest' @contextmanager - def check_context(self, tar, filter): + def check_context(self, tar, filter, *, check_flag=True): """Extracts `tar` to `self.destdir` and allows checking the result If an error occurs, it must be checked using `expect_exception` @@ -3565,27 +3673,40 @@ def check_context(self, tar, filter): except the destination directory itself and parent directories of other files. When checking directories, do so before their contents. + + A file called 'flag' is made in outerdir (i.e. outside destdir) + before extraction; it should not be altered nor should its contents + be read/copied. """ with os_helper.temp_dir(self.outerdir): + flag_path = self.outerdir / 'flag' + flag_path.write_text('capture me') try: tar.extractall(self.destdir, filter=filter) except Exception as exc: self.raised_exception = exc + self.reraise_exception = True self.expected_paths = set() else: self.raised_exception = None + self.reraise_exception = False self.expected_paths = set(self.outerdir.glob('**/*')) self.expected_paths.discard(self.destdir) + self.expected_paths.discard(flag_path) try: - yield + yield self finally: tar.close() - if self.raised_exception: + if self.reraise_exception: raise self.raised_exception self.assertEqual(self.expected_paths, set()) + if check_flag: + self.assertEqual(flag_path.read_text(), 'capture me') + else: + assert filter == 'fully_trusted' def expect_file(self, name, type=None, symlink_to=None, mode=None, - size=None): + size=None, content=None): """Check a single file. See check_context.""" if self.raised_exception: raise self.raised_exception @@ -3604,26 +3725,45 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None, # The symlink might be the same (textually) as what we expect, # but some systems change the link to an equivalent path, so # we fall back to samefile(). - if expected != got: - self.assertTrue(got.samefile(expected)) + try: + if expected != got: + self.assertTrue(got.samefile(expected)) + except Exception as e: + # attach a note, so it's shown even if `samefile` fails + e.add_note(f'{expected=}, {got=}') + raise elif type == tarfile.REGTYPE or type is None: self.assertTrue(path.is_file()) elif type == tarfile.DIRTYPE: self.assertTrue(path.is_dir()) elif type == tarfile.FIFOTYPE: self.assertTrue(path.is_fifo()) + elif type == tarfile.SYMTYPE: + self.assertTrue(path.is_symlink()) else: raise NotImplementedError(type) if size is not None: self.assertEqual(path.stat().st_size, size) + if content is not None: + self.assertEqual(path.read_text(), content) for parent in path.parents: self.expected_paths.discard(parent) + def expect_any_tree(self, name): + """Check a directory; forget about its contents.""" + tree_path = (self.destdir / name).resolve() + self.expect_file(tree_path, type=tarfile.DIRTYPE) + self.expected_paths = { + p for p in self.expected_paths + if tree_path not in p.parents + } + def expect_exception(self, exc_type, message_re='.'): with self.assertRaisesRegex(exc_type, message_re): if self.raised_exception is not None: raise self.raised_exception - self.raised_exception = None + self.reraise_exception = False + return self.raised_exception def test_benign_file(self): with ArchiveMaker() as arc: @@ -3708,6 +3848,80 @@ def test_parent_symlink(self): with self.check_context(arc.open(), 'data'): self.expect_file('parent/evil') + @symlink_test + @os_helper.skip_unless_symlink + def test_realpath_limit_attack(self): + # (CVE-2025-4517) + + with ArchiveMaker() as arc: + # populate the symlinks and dirs that expand in os.path.realpath() + # The component length is chosen so that in common cases, the unexpanded + # path fits in PATH_MAX, but it overflows when the final symlink + # is expanded + steps = "abcdefghijklmnop" + if sys.platform == 'win32': + component = 'd' * 25 + elif 'PC_PATH_MAX' in os.pathconf_names: + max_path_len = os.pathconf(self.outerdir.parent, "PC_PATH_MAX") + path_sep_len = 1 + dest_len = len(str(self.destdir)) + path_sep_len + component_len = (max_path_len - dest_len) // (len(steps) + path_sep_len) + component = 'd' * component_len + else: + raise NotImplementedError("Need to guess component length for {sys.platform}") + path = "" + step_path = "" + for i in steps: + arc.add(os.path.join(path, component), type=tarfile.DIRTYPE, + mode='drwxrwxrwx') + arc.add(os.path.join(path, i), symlink_to=component) + path = os.path.join(path, component) + step_path = os.path.join(step_path, i) + # create the final symlink that exceeds PATH_MAX and simply points + # to the top dir. + # this link will never be expanded by + # os.path.realpath(strict=False), nor anything after it. + linkpath = os.path.join(*steps, "l"*254) + parent_segments = [".."] * len(steps) + arc.add(linkpath, symlink_to=os.path.join(*parent_segments)) + # make a symlink outside to keep the tar command happy + arc.add("escape", symlink_to=os.path.join(linkpath, "..")) + # use the symlinks above, that are not checked, to create a hardlink + # to a file outside of the destination path + arc.add("flaglink", hardlink_to=os.path.join("escape", "flag")) + # now that we have the hardlink we can overwrite the file + arc.add("flaglink", content='overwrite') + # we can also create new files as well! + arc.add("escape/newfile", content='new') + + with (self.subTest('fully_trusted'), + self.check_context(arc.open(), filter='fully_trusted', + check_flag=False)): + if sys.platform == 'win32': + self.expect_exception((FileNotFoundError, FileExistsError)) + elif self.raised_exception: + # Cannot symlink/hardlink: tarfile falls back to getmember() + self.expect_exception(KeyError) + # Otherwise, this block should never enter. + else: + self.expect_any_tree(component) + self.expect_file('flaglink', content='overwrite') + self.expect_file('../newfile', content='new') + self.expect_file('escape', type=tarfile.SYMTYPE) + self.expect_file('a', symlink_to=component) + + for filter in 'tar', 'data': + with self.subTest(filter), self.check_context(arc.open(), filter=filter): + exc = self.expect_exception((OSError, KeyError)) + if isinstance(exc, OSError): + if sys.platform == 'win32': + # 3: ERROR_PATH_NOT_FOUND + # 5: ERROR_ACCESS_DENIED + # 206: ERROR_FILENAME_EXCED_RANGE + self.assertIn(exc.winerror, (3, 5, 206)) + else: + self.assertEqual(exc.errno, errno.ENAMETOOLONG) + @symlink_test def test_parent_symlink2(self): # Test interplaying symlinks @@ -3930,8 +4144,8 @@ def test_chains(self): arc.add('symlink2', symlink_to=os.path.join( 'linkdir', 'hardlink2')) arc.add('targetdir/target', size=3) - arc.add('linkdir/hardlink', hardlink_to='targetdir/target') - arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink') + arc.add('linkdir/hardlink', hardlink_to=os.path.join('targetdir', 'target')) + arc.add('linkdir/hardlink2', hardlink_to=os.path.join('linkdir', 'symlink')) for filter in 'tar', 'data', 'fully_trusted': with self.check_context(arc.open(), filter): @@ -3947,6 +4161,129 @@ def test_chains(self): self.expect_file('linkdir/symlink', size=3) self.expect_file('symlink2', size=3) + @symlink_test + def test_sneaky_hardlink_fallback(self): + # (CVE-2025-4330) + # Test that when hardlink extraction falls back to extracting members + # from the archive, the extracted member is (re-)filtered. + with ArchiveMaker() as arc: + # Create a directory structure so the c/escape symlink stays + # inside the path + arc.add("a/t/dummy") + # Create b/ directory + arc.add("b/") + # Point "c" to the bottom of the tree in "a" + arc.add("c", symlink_to=os.path.join("a", "t")) + # link to non-existant location under "a" + arc.add("c/escape", symlink_to=os.path.join("..", "..", + "link_here")) + # Move "c" to point to "b" ("c/escape" no longer exists) + arc.add("c", symlink_to="b") + # Attempt to create a hard link to "c/escape". Since it doesn't + # exist it will attempt to extract "cescape" but at "boom". + arc.add("boom", hardlink_to=os.path.join("c", "escape")) + + with self.check_context(arc.open(), 'data'): + if not os_helper.can_symlink(): + # When 'c/escape' is extracted, 'c' is a regular + # directory, and 'c/escape' *would* point outside + # the destination if symlinks were allowed. + self.expect_exception( + tarfile.LinkOutsideDestinationError) + elif sys.platform == "win32": + # On Windows, 'c/escape' points outside the destination + self.expect_exception(tarfile.LinkOutsideDestinationError) + else: + e = self.expect_exception( + tarfile.LinkFallbackError, + "link 'boom' would be extracted as a copy of " + + "'c/escape', which was rejected") + self.assertIsInstance(e.__cause__, + tarfile.LinkOutsideDestinationError) + for filter in 'tar', 'fully_trusted': + with self.subTest(filter), self.check_context(arc.open(), filter): + if not os_helper.can_symlink(): + self.expect_file("a/t/dummy") + self.expect_file("b/") + self.expect_file("c/") + else: + self.expect_file("a/t/dummy") + self.expect_file("b/") + self.expect_file("a/t/escape", symlink_to='../../link_here') + self.expect_file("boom", symlink_to='../../link_here') + self.expect_file("c", symlink_to='b') + + @symlink_test + def test_exfiltration_via_symlink(self): + # (CVE-2025-4138) + # Test changing symlinks that result in a symlink pointing outside + # the extraction directory, unless prevented by 'data' filter's + # normalization. + with ArchiveMaker() as arc: + arc.add("escape", symlink_to=os.path.join('link', 'link', '..', '..', 'link-here')) + arc.add("link", symlink_to='./') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + if os_helper.can_symlink(): + self.expect_file("link", symlink_to='./') + if filter == 'data': + self.expect_file("escape", symlink_to='link-here') + else: + self.expect_file("escape", + symlink_to='link/link/../../link-here') + else: + # Nothing is extracted. + pass + + @symlink_test + def test_chmod_outside_dir(self): + # (CVE-2024-12718) + # Test that members used for delayed updates of directory metadata + # are (re-)filtered. + with ArchiveMaker() as arc: + # "pwn" is a veeeery innocent symlink: + arc.add("a/pwn", symlink_to='.') + # But now "pwn" is also a directory, so it's scheduled to have its + # metadata updated later: + arc.add("a/pwn/", mode='drwxrwxrwx') + # Oops, "pwn" is not so innocent any more: + arc.add("a/pwn", symlink_to='x/../') + # Newly created symlink points to the dest dir, + # so it's OK for the "data" filter. + arc.add('a/x', symlink_to=('../')) + # But now "pwn" points outside the dest dir + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter) as cc: + if not os_helper.can_symlink(): + self.expect_file("a/pwn/") + elif filter == 'data': + self.expect_file("a/x", symlink_to='../') + self.expect_file("a/pwn", symlink_to='.') + else: + self.expect_file("a/x", symlink_to='../') + self.expect_file("a/pwn", symlink_to='x/../') + if sys.platform != "win32": + st_mode = cc.outerdir.stat().st_mode + self.assertNotEqual(st_mode & 0o777, 0o777) + + def test_link_fallback_normalizes(self): + # Make sure hardlink fallbacks work for non-normalized paths for all + # filters + with ArchiveMaker() as arc: + arc.add("dir/") + arc.add("dir/../afile") + arc.add("link1", hardlink_to='dir/../afile') + arc.add("link2", hardlink_to='dir/../dir/../afile') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter) as cc: + self.expect_file("dir/") + self.expect_file("afile") + self.expect_file("link1") + self.expect_file("link2") + def test_modes(self): # Test how file modes are extracted # (Note that the modes are ignored on platforms without working chmod) @@ -4071,24 +4408,64 @@ def test_tar_filter(self): # The 'tar' filter returns TarInfo objects with the same name/type. # (It can also fail for particularly "evil" input, but we don't have # that in the test archive.) - with tarfile.TarFile.open(tarname) as tar: + with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: for tarinfo in tar.getmembers(): - filtered = tarfile.tar_filter(tarinfo, '') + try: + filtered = tarfile.tar_filter(tarinfo, '') + except UnicodeEncodeError: + continue self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) def test_data_filter(self): # The 'data' filter either raises, or returns TarInfo with the same # name/type. - with tarfile.TarFile.open(tarname) as tar: + with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: for tarinfo in tar.getmembers(): try: filtered = tarfile.data_filter(tarinfo, '') - except tarfile.FilterError: + except (tarfile.FilterError, UnicodeEncodeError): continue self.assertIs(filtered.name, tarinfo.name) self.assertIs(filtered.type, tarinfo.type) + @unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths') + def test_filter_unencodable(self): + # Sanity check using a valid path. + tarinfo = tarfile.TarInfo(os_helper.TESTFN) + filtered = tarfile.tar_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) + filtered = tarfile.data_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) + + tarinfo = tarfile.TarInfo('test\x00') + self.assertRaises(ValueError, tarfile.tar_filter, tarinfo, '') + self.assertRaises(ValueError, tarfile.data_filter, tarinfo, '') + tarinfo = tarfile.TarInfo('\ud800') + self.assertRaises(UnicodeEncodeError, tarfile.tar_filter, tarinfo, '') + self.assertRaises(UnicodeEncodeError, tarfile.data_filter, tarinfo, '') + + @unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths') + def test_extract_unencodable(self): + # Create a member with name \xed\xa0\x80 which is UTF-8 encoded + # lone surrogate \ud800. + with ArchiveMaker(encoding='ascii', errors='surrogateescape') as arc: + arc.add('\udced\udca0\udc80') + with os_helper.temp_cwd() as tmp: + tar = arc.open(encoding='utf-8', errors='surrogatepass', + errorlevel=1) + self.assertEqual(tar.getnames(), ['\ud800']) + with self.assertRaises(UnicodeEncodeError): + tar.extractall() + self.assertEqual(os.listdir(), []) + + tar = arc.open(encoding='utf-8', errors='surrogatepass', + errorlevel=0, debug=1) + with support.captured_stderr() as stderr: + tar.extractall() + self.assertEqual(os.listdir(), []) + self.assertIn('tarfile: UnicodeEncodeError ', stderr.getvalue()) + def test_change_default_filter_on_instance(self): tar = tarfile.TarFile(tarname, 'r') def strict_filter(tarinfo, path): @@ -4201,13 +4578,13 @@ def valueerror_filter(tarinfo, path): # If errorlevel is 0, errors affected by errorlevel are ignored with self.check_context(arc.open(errorlevel=0), extracterror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), filtererror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), oserror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=0), tarerror_filter): self.expect_exception(tarfile.TarError) @@ -4218,7 +4595,7 @@ def valueerror_filter(tarinfo, path): # If 1, all fatal errors are raised with self.check_context(arc.open(errorlevel=1), extracterror_filter): - self.expect_file('file') + pass with self.check_context(arc.open(errorlevel=1), filtererror_filter): self.expect_exception(tarfile.FilterError) @@ -4287,6 +4664,161 @@ def extractall(self, ar): ar.extractall(self.testdir, filter='fully_trusted') +class OffsetValidationTests(unittest.TestCase): + tarname = tmpname + invalid_posix_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0011407" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 6 bytes, version: 2 bytes + + tarfile.POSIX_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # devminor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # prefix: 155 bytes + + tarfile.NUL * tarfile.LENGTH_PREFIX + # padding: 12 bytes + + tarfile.NUL * 12 + ) + invalid_gnu_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, null terminator: 8 bytes + + b"0000755" + tarfile.NUL + # uid, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0011327" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 8 bytes + + tarfile.GNU_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, null terminator: 8 bytes + + tarfile.NUL * 8 + # devminor, null terminator: 8 bytes + + tarfile.NUL * 8 + # padding: 167 bytes + + tarfile.NUL * 167 + ) + invalid_v7_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0010070" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # padding: 255 bytes + + tarfile.NUL * 255 + ) + valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT) + data_block = b"\xff" * tarfile.BLOCKSIZE + + def _write_buffer(self, buffer): + with open(self.tarname, "wb") as f: + f.write(buffer) + + def _get_members(self, ignore_zeros=None): + with open(self.tarname, "rb") as f: + with tarfile.open( + mode="r", fileobj=f, ignore_zeros=ignore_zeros + ) as tar: + return tar.getmembers() + + def _assert_raises_read_error_exception(self): + with self.assertRaisesRegex( + tarfile.ReadError, "file could not be opened successfully" + ): + self._get_members() + + def test_invalid_offset_header_validations(self): + for tar_format, invalid_header in ( + ("posix", self.invalid_posix_header), + ("gnu", self.invalid_gnu_header), + ("v7", self.invalid_v7_header), + ): + with self.subTest(format=tar_format): + self._write_buffer(invalid_header) + self._assert_raises_read_error_exception() + + def test_early_stop_at_invalid_offset_header(self): + buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header + self._write_buffer(buffer) + members = self._get_members() + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, 0) + + def test_ignore_invalid_archive(self): + # 3 invalid headers with their respective data + buffer = (self.invalid_gnu_header + self.data_block) * 3 + self._write_buffer(buffer) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 0) + + def test_ignore_invalid_offset_headers(self): + for first_block, second_block, expected_offset in ( + ( + (self.valid_gnu_header), + (self.invalid_gnu_header + self.data_block), + 0, + ), + ( + (self.invalid_gnu_header + self.data_block), + (self.valid_gnu_header), + 1024, + ), + ): + self._write_buffer(first_block + second_block) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, expected_offset) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py index d46d3c0f040601..52b13b98cbcce5 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py @@ -516,11 +516,11 @@ def test_collision_with_existing_file(self): _mock_candidate_names('aaa', 'aaa', 'bbb'): (fd1, name1) = self.make_temp() os.close(fd1) - self.assertTrue(name1.endswith('aaa')) + self.assertEndsWith(name1, 'aaa') (fd2, name2) = self.make_temp() os.close(fd2) - self.assertTrue(name2.endswith('bbb')) + self.assertEndsWith(name2, 'bbb') def test_collision_with_existing_directory(self): # _mkstemp_inner tries another name when a directory with @@ -528,11 +528,11 @@ def test_collision_with_existing_directory(self): with _inside_empty_temp_dir(), \ _mock_candidate_names('aaa', 'aaa', 'bbb'): dir = tempfile.mkdtemp() - self.assertTrue(dir.endswith('aaa')) + self.assertEndsWith(dir, 'aaa') (fd, name) = self.make_temp() os.close(fd) - self.assertTrue(name.endswith('bbb')) + self.assertEndsWith(name, 'bbb') class TestGetTempPrefix(BaseTestCase): @@ -828,9 +828,9 @@ def test_collision_with_existing_file(self): _mock_candidate_names('aaa', 'aaa', 'bbb'): file = tempfile.NamedTemporaryFile(delete=False) file.close() - self.assertTrue(file.name.endswith('aaa')) + self.assertEndsWith(file.name, 'aaa') dir = tempfile.mkdtemp() - self.assertTrue(dir.endswith('bbb')) + self.assertEndsWith(dir, 'bbb') def test_collision_with_existing_directory(self): # mkdtemp tries another name when a directory with @@ -838,9 +838,9 @@ def test_collision_with_existing_directory(self): with _inside_empty_temp_dir(), \ _mock_candidate_names('aaa', 'aaa', 'bbb'): dir1 = tempfile.mkdtemp() - self.assertTrue(dir1.endswith('aaa')) + self.assertEndsWith(dir1, 'aaa') dir2 = tempfile.mkdtemp() - self.assertTrue(dir2.endswith('bbb')) + self.assertEndsWith(dir2, 'bbb') def test_for_tempdir_is_bytes_issue40701_api_warts(self): orig_tempdir = tempfile.tempdir diff --git a/Lib/test/test_termios.py b/Lib/test/test_termios.py index e5d11cf84d2a66..ce8392a6ccdbd6 100644 --- a/Lib/test/test_termios.py +++ b/Lib/test/test_termios.py @@ -290,8 +290,8 @@ def test_ioctl_constants(self): self.assertGreaterEqual(value, 0) def test_exception(self): - self.assertTrue(issubclass(termios.error, Exception)) - self.assertFalse(issubclass(termios.error, OSError)) + self.assertIsSubclass(termios.error, Exception) + self.assertNotIsSubclass(termios.error, OSError) if __name__ == '__main__': diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index cbd383ea4e2656..aca1f427656bb5 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -605,7 +605,7 @@ def test_break_long(self): # bug 1146. Prevent a long word to be wrongly wrapped when the # preceding word is exactly one character shorter than the width self.check_wrap(self.text, 12, - ['Did you say ', + ['Did you say', '"supercalifr', 'agilisticexp', 'ialidocious?', @@ -633,7 +633,7 @@ def test_nobreak_long(self): def test_max_lines_long(self): self.check_wrap(self.text, 12, - ['Did you say ', + ['Did you say', '"supercalifr', 'agilisticexp', '[...]'], diff --git a/Lib/test/test_threadedtempfile.py b/Lib/test/test_threadedtempfile.py index 420fc6ec8be3d8..acb427b0c78ae9 100644 --- a/Lib/test/test_threadedtempfile.py +++ b/Lib/test/test_threadedtempfile.py @@ -15,6 +15,7 @@ import tempfile +from test import support from test.support import threading_helper import unittest import io @@ -49,7 +50,8 @@ def run(self): class ThreadedTempFileTest(unittest.TestCase): - def test_main(self): + @support.bigmemtest(size=NUM_THREADS, memuse=60*2**20, dry_run=False) + def test_main(self, size): threads = [TempFileGreedy() for i in range(NUM_THREADS)] with threading_helper.start_threads(threads, startEvent.set): pass diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 4ab38c2598b50a..bb51ddd38e2cad 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -28,7 +28,7 @@ from test import support try: - from test.support import interpreters + from concurrent import interpreters except ImportError: interpreters = None @@ -530,7 +530,8 @@ def test_enumerate_after_join(self): finally: sys.setswitchinterval(old_interval) - def test_join_from_multiple_threads(self): + @support.bigmemtest(size=20, memuse=72*2**20, dry_run=False) + def test_join_from_multiple_threads(self, size): # Thread.join() should be thread-safe errors = [] @@ -1351,6 +1352,62 @@ def do_flush(*args, **kwargs): ''') assert_python_ok("-c", script) + @skip_unless_reliable_fork + @unittest.skipUnless(hasattr(threading, 'get_native_id'), "test needs threading.get_native_id()") + def test_native_id_after_fork(self): + script = """if True: + import threading + import os + from test import support + + parent_thread_native_id = threading.current_thread().native_id + print(parent_thread_native_id, flush=True) + assert parent_thread_native_id == threading.get_native_id() + childpid = os.fork() + if childpid == 0: + print(threading.current_thread().native_id, flush=True) + assert threading.current_thread().native_id == threading.get_native_id() + else: + try: + assert parent_thread_native_id == threading.current_thread().native_id + assert parent_thread_native_id == threading.get_native_id() + finally: + support.wait_process(childpid, exitcode=0) + """ + rc, out, err = assert_python_ok('-c', script) + self.assertEqual(rc, 0) + self.assertEqual(err, b"") + native_ids = out.strip().splitlines() + self.assertEqual(len(native_ids), 2) + self.assertNotEqual(native_ids[0], native_ids[1]) + + def test_stop_the_world_during_finalization(self): + # gh-137433: Test functions that trigger a stop-the-world in the free + # threading build concurrent with interpreter finalization. + script = """if True: + import gc + import sys + import threading + NUM_THREADS = 5 + b = threading.Barrier(NUM_THREADS + 1) + def run_in_bg(): + b.wait() + while True: + sys.setprofile(None) + gc.collect() + + for _ in range(NUM_THREADS): + t = threading.Thread(target=run_in_bg, daemon=True) + t.start() + + b.wait() + print("Exiting...") + """ + rc, out, err = assert_python_ok('-c', script) + self.assertEqual(rc, 0) + self.assertEqual(err, b"") + self.assertEqual(out.strip(), b"Exiting...") + class ThreadJoinOnShutdown(BaseTestCase): def _run_and_join(self, script): @@ -1431,7 +1488,8 @@ def worker(): self._run_and_join(script) @unittest.skipIf(sys.platform in platforms_to_skip, "due to known OS bug") - def test_4_daemon_threads(self): + @support.bigmemtest(size=40, memuse=70*2**20, dry_run=False) + def test_4_daemon_threads(self, size): # Check that a daemon thread cannot crash the interpreter on shutdown # by manipulating internal structures that are being disposed of in # the main thread. @@ -1669,6 +1727,7 @@ def task(): self.assertEqual(os.read(r_interp, 1), DONE) @cpython_only + @support.skip_if_sanitizer(thread=True, memory=True) def test_daemon_threads_fatal_error(self): import_module("_testcapi") subinterp_code = f"""if 1: @@ -1687,10 +1746,7 @@ def f(): _testcapi.run_in_subinterp(%r) """ % (subinterp_code,) - with test.support.SuppressCrashReport(): - rc, out, err = assert_python_failure("-c", script) - self.assertIn("Fatal Python error: Py_EndInterpreter: " - "not the last thread", err.decode()) + assert_python_ok("-c", script) def _check_allowed(self, before_start='', *, allowed=True, @@ -1962,6 +2018,23 @@ def modify_file(): t.start() t.join() + def test_dummy_thread_on_interpreter_shutdown(self): + # GH-130522: When `threading` held a reference to itself and then a + # _DummyThread() object was created, destruction of the dummy thread + # would emit an unraisable exception at shutdown, due to a lock being + # destroyed. + code = """if True: + import sys + import threading + + threading.x = sys.modules[__name__] + x = threading._DummyThread() + """ + rc, out, err = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(out, b"") + self.assertEqual(err, b"") + class ThreadRunFail(threading.Thread): def run(self): @@ -2182,6 +2255,9 @@ def test__all__(self): @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") def test_set_name(self): + # Ensure main thread name is restored after test + self.addCleanup(_thread.set_name, _thread._get_name()) + # set_name() limit in bytes truncate = getattr(_thread, "_NAME_MAXLEN", None) limit = truncate or 100 @@ -2221,7 +2297,8 @@ def test_set_name(self): tests.append(os_helper.TESTFN_UNENCODABLE) if sys.platform.startswith("sunos"): - encoding = "utf-8" + # Use ASCII encoding on Solaris/Illumos/OpenIndiana + encoding = "ascii" else: encoding = sys.getfilesystemencoding() @@ -2237,7 +2314,7 @@ def work(): if truncate is not None: encoded = encoded[:truncate] if sys.platform.startswith("sunos"): - expected = encoded.decode("utf-8", "surrogateescape") + expected = encoded.decode("ascii", "surrogateescape") else: expected = os.fsdecode(encoded) else: @@ -2256,7 +2333,11 @@ def work(): if '\0' in expected: expected = expected.split('\0', 1)[0] - with self.subTest(name=name, expected=expected): + with self.subTest(name=name, expected=expected, thread="main"): + _thread.set_name(name) + self.assertEqual(_thread._get_name(), expected) + + with self.subTest(name=name, expected=expected, thread="worker"): work_name = None thread = threading.Thread(target=work, name=name) thread.start() diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index d06f65270efe79..5312faa50774ec 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -761,17 +761,17 @@ def test_localtime_timezone(self): # Get the localtime and examine it for the offset and zone. lt = time.localtime() - self.assertTrue(hasattr(lt, "tm_gmtoff")) - self.assertTrue(hasattr(lt, "tm_zone")) + self.assertHasAttr(lt, "tm_gmtoff") + self.assertHasAttr(lt, "tm_zone") # See if the offset and zone are similar to the module # attributes. if lt.tm_gmtoff is None: - self.assertTrue(not hasattr(time, "timezone")) + self.assertNotHasAttr(time, "timezone") else: self.assertEqual(lt.tm_gmtoff, -[time.timezone, time.altzone][lt.tm_isdst]) if lt.tm_zone is None: - self.assertTrue(not hasattr(time, "tzname")) + self.assertNotHasAttr(time, "tzname") else: self.assertEqual(lt.tm_zone, time.tzname[lt.tm_isdst]) @@ -1184,11 +1184,11 @@ def test_clock_functions(self): if mac_ver >= (10, 12): for name in clock_names: - self.assertTrue(hasattr(time, name), f"time.{name} is not available") + self.assertHasAttr(time, name) else: for name in clock_names: - self.assertFalse(hasattr(time, name), f"time.{name} is available") + self.assertNotHasAttr(time, name) if __name__ == "__main__": diff --git a/Lib/test/test_timeit.py b/Lib/test/test_timeit.py index f5ae0a84eb3506..2aeebea9f93d43 100644 --- a/Lib/test/test_timeit.py +++ b/Lib/test/test_timeit.py @@ -222,8 +222,8 @@ def test_repeat_function_zero_iters(self): def assert_exc_string(self, exc_string, expected_exc_name): exc_lines = exc_string.splitlines() self.assertGreater(len(exc_lines), 2) - self.assertTrue(exc_lines[0].startswith('Traceback')) - self.assertTrue(exc_lines[-1].startswith(expected_exc_name)) + self.assertStartsWith(exc_lines[0], 'Traceback') + self.assertStartsWith(exc_lines[-1], expected_exc_name) def test_print_exc(self): s = io.StringIO() diff --git a/Lib/test/test_tkinter/support.py b/Lib/test/test_tkinter/support.py index ebb9e00ff91bf0..46b01e6f131290 100644 --- a/Lib/test/test_tkinter/support.py +++ b/Lib/test/test_tkinter/support.py @@ -58,7 +58,7 @@ def _test_widget(self, constructor): destroy_default_root() tkinter.NoDefaultRoot() self.assertRaises(RuntimeError, constructor) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') def destroy_default_root(): diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py index 96ea3f0117ca03..0c76e07066f8a8 100644 --- a/Lib/test/test_tkinter/test_misc.py +++ b/Lib/test/test_tkinter/test_misc.py @@ -497,7 +497,7 @@ def test_info_patchlevel(self): self.assertEqual(vi.serial, 0) else: self.assertEqual(vi.micro, 0) - self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}')) + self.assertStartsWith(str(vi), f'{vi.major}.{vi.minor}') def test_embedded_null(self): widget = tkinter.Entry(self.root) @@ -609,7 +609,7 @@ def test_focus(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, '??') self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, '??') self.assertEqual(e.char, '??') @@ -642,7 +642,7 @@ def test_configure(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, '??') self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, '??') self.assertEqual(e.char, '??') @@ -676,7 +676,7 @@ def test_event_generate_key_press(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertIsInstance(e.state, int) self.assertNotEqual(e.state, 0) @@ -747,7 +747,7 @@ def test_event_generate_button_press(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, 1) self.assertEqual(e.state, 0) self.assertEqual(e.char, '??') @@ -781,7 +781,7 @@ def test_event_generate_motion(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, 0x100) self.assertEqual(e.char, '??') @@ -814,7 +814,7 @@ def test_event_generate_mouse_wheel(self): self.assertIs(e.widget, f) self.assertIsInstance(e.serial, int) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.time, 0) self.assertEqual(e.num, '??') self.assertEqual(e.state, 0) @@ -849,7 +849,7 @@ def test_generate_event_virtual_event(self): self.assertIsInstance(e.serial, int) self.assertEqual(e.time, 0) self.assertIs(e.send_event, False) - self.assertFalse(hasattr(e, 'focus')) + self.assertNotHasAttr(e, 'focus') self.assertEqual(e.num, '??') self.assertEqual(e.state, 0) self.assertEqual(e.char, '??') @@ -1308,17 +1308,17 @@ def test_no_default_root(self): self.assertIs(tkinter._default_root, root) tkinter.NoDefaultRoot() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') # repeated call is no-op tkinter.NoDefaultRoot() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root.destroy() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root = tkinter.Tk() self.assertIs(tkinter._support_default_root, False) - self.assertFalse(hasattr(tkinter, '_default_root')) + self.assertNotHasAttr(tkinter, '_default_root') root.destroy() def test_getboolean(self): diff --git a/Lib/test/test_tkinter/test_text.py b/Lib/test/test_tkinter/test_text.py index b26956930d3402..ed618c739019cc 100644 --- a/Lib/test/test_tkinter/test_text.py +++ b/Lib/test/test_tkinter/test_text.py @@ -34,12 +34,45 @@ def test_search(self): # Invalid text index. self.assertRaises(tkinter.TclError, text.search, '', 0) + self.assertRaises(tkinter.TclError, text.search, '', '') + self.assertRaises(tkinter.TclError, text.search, '', 'invalid') + self.assertRaises(tkinter.TclError, text.search, '', '1.0', 'invalid') - # Check if we are getting the indices as strings -- you are likely - # to get Tcl_Obj under Tk 8.5 if Tkinter doesn't convert it. - text.insert('1.0', 'hi-test') - self.assertEqual(text.search('-test', '1.0', 'end'), '1.2') - self.assertEqual(text.search('test', '1.0', 'end'), '1.3') + text.insert('1.0', + 'This is a test. This is only a test.\n' + 'Another line.\n' + 'Yet another line.\n' + '64-bit') + + self.assertEqual(text.search('test', '1.0'), '1.10') + self.assertEqual(text.search('test', '1.0', 'end'), '1.10') + self.assertEqual(text.search('test', '1.0', '1.10'), '') + self.assertEqual(text.search('test', '1.11'), '1.31') + self.assertEqual(text.search('test', '1.32', 'end'), '') + self.assertEqual(text.search('test', '1.32'), '1.10') + + self.assertEqual(text.search('', '1.0'), '1.0') # empty pattern + self.assertEqual(text.search('nonexistent', '1.0'), '') + self.assertEqual(text.search('-bit', '1.0'), '4.2') # starts with a hyphen + + self.assertEqual(text.search('line', '3.0'), '3.12') + self.assertEqual(text.search('line', '3.0', forwards=True), '3.12') + self.assertEqual(text.search('line', '3.0', backwards=True), '2.8') + self.assertEqual(text.search('line', '3.0', forwards=True, backwards=True), '2.8') + + self.assertEqual(text.search('t.', '1.0'), '1.13') + self.assertEqual(text.search('t.', '1.0', exact=True), '1.13') + self.assertEqual(text.search('t.', '1.0', regexp=True), '1.10') + self.assertEqual(text.search('t.', '1.0', exact=True, regexp=True), '1.10') + + self.assertEqual(text.search('TEST', '1.0'), '') + self.assertEqual(text.search('TEST', '1.0', nocase=True), '1.10') + + var = tkinter.Variable(self.root) + self.assertEqual(text.search('test', '1.0', count=var), '1.10') + self.assertEqual(var.get(), 4 if self.wantobjects else '4') + + # TODO: Add test for elide=True def test_count(self): text = self.text diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 2d41a5e5ac0697..ca67e381958757 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,6 +1,8 @@ import contextlib +import itertools import os import re +import string import tempfile import token import tokenize @@ -1214,6 +1216,23 @@ def test_multiline_non_ascii_fstring_with_expr(self): FSTRING_END "\'\'\'" (3, 1) (3, 4) """) + # gh-139516, the '\n' is explicit to ensure no trailing whitespace which would invalidate the test + self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\ + FSTRING_START \'f"\' (1, 0) (1, 2) + OP '{' (1, 2) (1, 3) + NAME 'f' (1, 3) (1, 4) + OP '(' (1, 4) (1, 5) + NAME 'a' (1, 5) (1, 6) + OP '=' (1, 6) (1, 7) + NAME 'lambda' (1, 7) (1, 13) + OP ':' (1, 13) (1, 14) + STRING "\'à\'" (1, 15) (1, 18) + NL '\\n' (1, 18) (1, 19) + OP ')' (2, 0) (2, 1) + OP '}' (2, 1) (2, 2) + FSTRING_END \'"\' (2, 2) (2, 3) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. @@ -1344,7 +1363,8 @@ def readline(): def test_no_bom_no_encoding_cookie(self): lines = ( - b'# something\n', + b'#!/home/\xc3\xa4/bin/python\n', + b'# something \xe2\x82\xac\n', b'print(something)\n', b'do_something(else)\n' ) @@ -1352,16 +1372,54 @@ def test_no_bom_no_encoding_cookie(self): self.assertEqual(encoding, 'utf-8') self.assertEqual(consumed_lines, list(lines[:2])) + def test_no_bom_no_encoding_cookie_first_line_error(self): + lines = ( + b'#!/home/\xa4/bin/python\n\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_no_bom_no_encoding_cookie_second_line_error(self): + lines = ( + b'#!/usr/bin/python\n', + b'# something \xe2\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + def test_bom_no_cookie(self): lines = ( - b'\xef\xbb\xbf# something\n', + b'\xef\xbb\xbf#!/home/\xc3\xa4/bin/python\n', b'print(something)\n', b'do_something(else)\n' ) encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) self.assertEqual(encoding, 'utf-8-sig') self.assertEqual(consumed_lines, - [b'# something\n', b'print(something)\n']) + [b'#!/home/\xc3\xa4/bin/python\n', b'print(something)\n']) + + def test_bom_no_cookie_first_line_error(self): + lines = ( + b'\xef\xbb\xbf#!/home/\xa4/bin/python\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_bom_no_cookie_second_line_error(self): + lines = ( + b'\xef\xbb\xbf#!/usr/bin/python\n', + b'# something \xe2\n', + b'print(something)\n', + b'do_something(else)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) def test_cookie_first_line_no_bom(self): lines = ( @@ -1437,16 +1495,60 @@ def test_cookie_second_line_noncommented_first_line(self): expected = [b"print('\xc2\xa3')\n"] self.assertEqual(consumed_lines, expected) - def test_cookie_second_line_commented_first_line(self): + def test_first_non_utf8_coding_line(self): lines = ( - b"#print('\xc2\xa3')\n", - b'# vim: set fileencoding=iso8859-15 :\n', - b"print('\xe2\x82\xac')\n" + b'#coding:iso-8859-15 \xa4\n', + b'print(something)\n' ) encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) - self.assertEqual(encoding, 'iso8859-15') - expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] - self.assertEqual(consumed_lines, expected) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_first_utf8_coding_line_error(self): + lines = ( + b'#coding:ascii \xc3\xa4\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_second_non_utf8_coding_line(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:iso-8859-15 \xa4\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_second_utf8_coding_line_error(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:ascii \xc3\xa4\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_non_utf8_shebang(self): + lines = ( + b'#!/home/\xa4/bin/python\n', + b'#coding:iso-8859-15\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso-8859-15') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_utf8_shebang_error(self): + lines = ( + b'#!/home/\xc3\xa4/bin/python\n', + b'#coding:ascii\n', + b'print(something)\n' + ) + with self.assertRaises(SyntaxError): + tokenize.detect_encoding(self.get_readline(lines)) def test_cookie_second_line_empty_first_line(self): lines = ( @@ -1459,6 +1561,70 @@ def test_cookie_second_line_empty_first_line(self): expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] self.assertEqual(consumed_lines, expected) + def test_cookie_third_line(self): + lines = ( + b'#!/home/\xc3\xa4/bin/python\n', + b'# something\n', + b'# vim: set fileencoding=ascii :\n', + b'print(something)\n', + b'do_something(else)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'utf-8') + self.assertEqual(consumed_lines, list(lines[:2])) + + def test_double_coding_line(self): + # If the first line matches the second line is ignored. + lines = ( + b'#coding:iso8859-15\n', + b'#coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_double_coding_same_line(self): + lines = ( + b'#coding:iso8859-15 coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'iso8859-15') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_double_coding_utf8(self): + lines = ( + b'#coding:utf-8\n', + b'#coding:latin1\n', + b'print(something)\n' + ) + encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines)) + self.assertEqual(encoding, 'utf-8') + self.assertEqual(consumed_lines, list(lines[:1])) + + def test_nul_in_first_coding_line(self): + lines = ( + b'#coding:iso8859-15\x00\n', + b'\n', + b'\n', + b'print(something)\n' + ) + with self.assertRaisesRegex(SyntaxError, + "source code cannot contain null bytes"): + tokenize.detect_encoding(self.get_readline(lines)) + + def test_nul_in_second_coding_line(self): + lines = ( + b'#!/usr/bin/python\n', + b'#coding:iso8859-15\x00\n', + b'\n', + b'print(something)\n' + ) + with self.assertRaisesRegex(SyntaxError, + "source code cannot contain null bytes"): + tokenize.detect_encoding(self.get_readline(lines)) + def test_latin1_normalization(self): # See get_normal_name() in Parser/tokenizer/helpers.c. encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix", @@ -1483,7 +1649,6 @@ def test_syntaxerror_latin1(self): readline = self.get_readline(lines) self.assertRaises(SyntaxError, tokenize.detect_encoding, readline) - def test_utf8_normalization(self): # See get_normal_name() in Parser/tokenizer/helpers.c. encodings = ("utf-8", "utf-8-mac", "utf-8-unix") @@ -1975,6 +2140,10 @@ def test_roundtrip(self): for case in cases: self.check_roundtrip(case) + self.check_roundtrip(r"t'{ {}}'") + self.check_roundtrip(r"t'{f'{ {}}'}{ {}}'") + self.check_roundtrip(r"f'{t'{ {}}'}{ {}}'") + def test_continuation(self): # Balancing continuation @@ -3014,6 +3183,7 @@ def get_tokens(string): f'__{ x:d }__'""", + " a\n\x00", ]: with self.subTest(case=case): self.assertRaises(tokenize.TokenError, get_tokens, case) @@ -3234,5 +3404,77 @@ def test_exact_flag(self): self.check_output(source, expect, flag) +class StringPrefixTest(unittest.TestCase): + @staticmethod + def determine_valid_prefixes(): + # Try all lengths until we find a length that has zero valid + # prefixes. This will miss the case where for example there + # are no valid 3 character prefixes, but there are valid 4 + # character prefixes. That seems unlikely. + + single_char_valid_prefixes = set() + + # Find all of the single character string prefixes. Just get + # the lowercase version, we'll deal with combinations of upper + # and lower case later. I'm using this logic just in case + # some uppercase-only prefix is added. + for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase): + try: + eval(f'{letter}""') + single_char_valid_prefixes.add(letter.lower()) + except SyntaxError: + pass + + # This logic assumes that all combinations of valid prefixes only use + # the characters that are valid single character prefixes. That seems + # like a valid assumption, but if it ever changes this will need + # adjusting. + valid_prefixes = set() + for length in itertools.count(): + num_at_this_length = 0 + for prefix in ( + "".join(l) + for l in itertools.combinations(single_char_valid_prefixes, length) + ): + for t in itertools.permutations(prefix): + for u in itertools.product(*[(c, c.upper()) for c in t]): + p = "".join(u) + if p == "not": + # 'not' can never be a string prefix, + # because it's a valid expression: not "" + continue + try: + eval(f'{p}""') + + # No syntax error, so p is a valid string + # prefix. + + valid_prefixes.add(p) + num_at_this_length += 1 + except SyntaxError: + pass + if num_at_this_length == 0: + return valid_prefixes + + + def test_prefixes(self): + # Get the list of defined string prefixes. I don't see an + # obvious documented way of doing this, but probably the best + # thing is to split apart tokenize.StringPrefix. + + # Make sure StringPrefix begins and ends in parens. We're + # assuming it's of the form "(a|b|ab)", if a, b, and cd are + # valid string prefixes. + self.assertEqual(tokenize.StringPrefix[0], '(') + self.assertEqual(tokenize.StringPrefix[-1], ')') + + # Then split apart everything else by '|'. + defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|')) + + # Now compute the actual allowed string prefixes and compare + # to what is defined in the tokenize module. + self.assertEqual(defined_prefixes, self.determine_valid_prefixes()) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index b9be87f357ffdd..b21daeb1b67556 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -19,7 +19,7 @@ requires_debug_ranges, has_no_debug_ranges, requires_subprocess) from test.support.os_helper import TESTFN, unlink -from test.support.script_helper import assert_python_ok, assert_python_failure +from test.support.script_helper import assert_python_ok, assert_python_failure, make_script from test.support.import_helper import forget from test.support import force_not_colorized, force_not_colorized_test_class @@ -1740,6 +1740,49 @@ def f(): ] self.assertEqual(result_lines, expected) +class TestKeywordTypoSuggestions(unittest.TestCase): + TYPO_CASES = [ + ("with block ad something:\n pass", "and"), + ("fur a in b:\n pass", "for"), + ("for a in b:\n pass\nelso:\n pass", "else"), + ("whille True:\n pass", "while"), + ("iff x > 5:\n pass", "if"), + ("if x:\n pass\nelseif y:\n pass", "elif"), + ("tyo:\n pass\nexcept y:\n pass", "try"), + ("classe MyClass:\n pass", "class"), + ("impor math", "import"), + ("form x import y", "from"), + ("defn calculate_sum(a, b):\n return a + b", "def"), + ("def foo():\n returm result", "return"), + ("lamda x: x ** 2", "lambda"), + ("def foo():\n yeld i", "yield"), + ("def foo():\n globel counter", "global"), + ("frum math import sqrt", "from"), + ("asynch def fetch_data():\n pass", "async"), + ("async def foo():\n awaid fetch_data()", "await"), + ('raisee ValueError("Error")', "raise"), + ("[x for x\nin range(3)\nof x]", "if"), + ("[123 fur x\nin range(3)\nif x]", "for"), + ("for x im n:\n pass", "in"), + ] + + def test_keyword_suggestions_from_file(self): + with tempfile.TemporaryDirectory() as script_dir: + for i, (code, expected_kw) in enumerate(self.TYPO_CASES): + with self.subTest(typo=expected_kw): + source = textwrap.dedent(code).strip() + script_name = make_script(script_dir, f"script_{i}", source) + rc, stdout, stderr = assert_python_failure(script_name) + stderr_text = stderr.decode('utf-8') + self.assertIn(f"Did you mean '{expected_kw}'", stderr_text) + + def test_keyword_suggestions_from_command_string(self): + for code, expected_kw in self.TYPO_CASES: + with self.subTest(typo=expected_kw): + source = textwrap.dedent(code).strip() + rc, stdout, stderr = assert_python_failure('-c', source) + stderr_text = stderr.decode('utf-8') + self.assertIn(f"Did you mean '{expected_kw}'", stderr_text) @requires_debug_ranges() @force_not_colorized_test_class @@ -4188,6 +4231,15 @@ def __dir__(self): self.assertNotIn("blech", actual) self.assertNotIn("oh no!", actual) + def test_attribute_error_with_non_string_candidates(self): + class T: + bluch = 1 + + instance = T() + instance.__dict__[0] = 1 + actual = self.get_suggestion(instance, 'blich') + self.assertIn("bluch", actual) + def test_attribute_error_with_bad_name(self): def raise_attribute_error_with_bad_name(): raise AttributeError(name=12, obj=23) @@ -4223,8 +4275,8 @@ def make_module(self, code): return mod_name - def get_import_from_suggestion(self, mod_dict, name): - modname = self.make_module(mod_dict) + def get_import_from_suggestion(self, code, name): + modname = self.make_module(code) def callable(): try: @@ -4301,6 +4353,13 @@ def test_import_from_suggestions_underscored(self): self.assertIn("'_bluch'", self.get_import_from_suggestion(code, '_luch')) self.assertNotIn("'_bluch'", self.get_import_from_suggestion(code, 'bluch')) + def test_import_from_suggestions_non_string(self): + modWithNonStringAttr = textwrap.dedent("""\ + globals()[0] = 1 + bluch = 1 + """) + self.assertIn("'bluch'", self.get_import_from_suggestion(modWithNonStringAttr, 'blech')) + def test_import_from_suggestions_do_not_trigger_for_long_attributes(self): code = "blech = None" @@ -4397,6 +4456,15 @@ def func(): actual = self.get_suggestion(func) self.assertIn("'ZeroDivisionError'?", actual) + def test_name_error_suggestions_with_non_string_candidates(self): + def func(): + abc = 1 + custom_globals = globals().copy() + custom_globals[0] = 1 + print(eval("abv", custom_globals, locals())) + actual = self.get_suggestion(func) + self.assertIn("abc", actual) + def test_name_error_suggestions_do_not_trigger_for_long_names(self): def func(): somethingverywronghehehehehehe = None diff --git a/Lib/test/test_tstring.py b/Lib/test/test_tstring.py index e72a1ea54176d5..74653c77c55de1 100644 --- a/Lib/test/test_tstring.py +++ b/Lib/test/test_tstring.py @@ -150,7 +150,6 @@ def test_raw_tstrings(self): t = tr"{path}\Documents" self.assertTStringEqual(t, ("", r"\Documents"), [(path, "path")]) - def test_template_concatenation(self): # Test template + template t1 = t"Hello, " @@ -161,9 +160,10 @@ def test_template_concatenation(self): # Test template + string t1 = t"Hello" - combined = t1 + ", world" - self.assertTStringEqual(combined, ("Hello, world",), ()) - self.assertEqual(fstring(combined), "Hello, world") + expected_msg = 'can only concatenate string.templatelib.Template ' \ + '\\(not "str"\\) to string.templatelib.Template' + with self.assertRaisesRegex(TypeError, expected_msg): + t1 + ", world" # Test template + template with interpolation name = "Python" @@ -174,9 +174,10 @@ def test_template_concatenation(self): self.assertEqual(fstring(combined), "Hello, Python") # Test string + template - t = "Hello, " + t"{name}" - self.assertTStringEqual(t, ("Hello, ", ""), [(name, "name")]) - self.assertEqual(fstring(t), "Hello, Python") + expected_msg = 'can only concatenate str ' \ + '\\(not "string.templatelib.Template"\\) to str' + with self.assertRaisesRegex(TypeError, expected_msg): + "Hello, " + t"{name}" def test_nested_templates(self): # Test a template inside another template expression @@ -219,6 +220,7 @@ def test_syntax_errors(self): ("t'{lambda:1}'", "t-string: lambda expressions are not allowed " "without parentheses"), ("t'{x:{;}}'", "t-string: expecting a valid expression after '{'"), + ("t'{1:d\n}'", "t-string: newlines are not allowed in format specifiers") ): with self.subTest(case), self.assertRaisesRegex(SyntaxError, err): eval(case) @@ -240,52 +242,28 @@ def test_literal_concatenation(self): self.assertTStringEqual(t, ("Hello, ", ""), [(name, "name")]) self.assertEqual(fstring(t), "Hello, Python") - # Test concatenation with string literal - name = "Python" - t = t"Hello, {name}" "and welcome!" - self.assertTStringEqual( - t, ("Hello, ", "and welcome!"), [(name, "name")] - ) - self.assertEqual(fstring(t), "Hello, Pythonand welcome!") - - # Test concatenation with Unicode literal - name = "Python" - t = t"Hello, {name}" u"and welcome!" - self.assertTStringEqual( - t, ("Hello, ", "and welcome!"), [(name, "name")] - ) - self.assertEqual(fstring(t), "Hello, Pythonand welcome!") - - # Test concatenation with f-string literal - tab = '\t' - t = t"Tab: {tab}. " f"f-tab: {tab}." - self.assertTStringEqual(t, ("Tab: ", ". f-tab: \t."), [(tab, "tab")]) - self.assertEqual(fstring(t), "Tab: \t. f-tab: \t.") - - # Test concatenation with raw string literal - tab = '\t' - t = t"Tab: {tab}. " r"Raw tab: \t." - self.assertTStringEqual( - t, ("Tab: ", r". Raw tab: \t."), [(tab, "tab")] - ) - self.assertEqual(fstring(t), "Tab: \t. Raw tab: \\t.") - - # Test concatenation with raw f-string literal - tab = '\t' - t = t"Tab: {tab}. " rf"f-tab: {tab}. Raw tab: \t." - self.assertTStringEqual( - t, ("Tab: ", ". f-tab: \t. Raw tab: \\t."), [(tab, "tab")] - ) - self.assertEqual(fstring(t), "Tab: \t. f-tab: \t. Raw tab: \\t.") - + # Test disallowed mix of t-string and string/f-string (incl. bytes) what = 't' - expected_msg = 'cannot mix bytes and nonbytes literals' + expected_msg = 'cannot mix t-string literals with string or bytes literals' for case in ( + "t'{what}-string literal' 'str literal'", + "t'{what}-string literal' u'unicode literal'", + "t'{what}-string literal' f'f-string literal'", + "t'{what}-string literal' r'raw string literal'", + "t'{what}-string literal' rf'raw f-string literal'", "t'{what}-string literal' b'bytes literal'", "t'{what}-string literal' br'raw bytes literal'", + "'str literal' t'{what}-string literal'", + "u'unicode literal' t'{what}-string literal'", + "f'f-string literal' t'{what}-string literal'", + "r'raw string literal' t'{what}-string literal'", + "rf'raw f-string literal' t'{what}-string literal'", + "b'bytes literal' t'{what}-string literal'", + "br'raw bytes literal' t'{what}-string literal'", ): - with self.assertRaisesRegex(SyntaxError, expected_msg): - eval(case) + with self.subTest(case): + with self.assertRaisesRegex(SyntaxError, expected_msg): + eval(case) def test_triple_quoted(self): # Test triple-quoted t-strings diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 9ce80c5e8ea009..e533392b8cae94 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -290,12 +290,18 @@ def test_repr(self): self.assertEqual(repr(a0), "()") self.assertEqual(repr(a2), "(0, 1, 2)") + # Checks that t is not tracked without any GC collections. + def _not_tracked_instantly(self, t): + self.assertFalse(gc.is_tracked(t), t) + + # Checks that t is not tracked after GC collection. def _not_tracked(self, t): # Nested tuples can take several collections to untrack gc.collect() gc.collect() self.assertFalse(gc.is_tracked(t), t) + # Checks that t continues to be tracked even after GC collection. def _tracked(self, t): self.assertTrue(gc.is_tracked(t), t) gc.collect() @@ -307,13 +313,19 @@ def test_track_literals(self): # Test GC-optimization of tuple literals x, y, z = 1.5, "a", [] - self._not_tracked(()) - self._not_tracked((1,)) - self._not_tracked((1, 2)) - self._not_tracked((1, 2, "a")) - self._not_tracked((1, 2, (None, True, False, ()), int)) - self._not_tracked((object(),)) + # We check that those objects aren't tracked at all. + # It's essential for the GC performance, see gh-139951. + self._not_tracked_instantly(()) + self._not_tracked_instantly((1,)) + self._not_tracked_instantly((1, 2)) + self._not_tracked_instantly((1, 2, "a")) + self._not_tracked_instantly((1, 2) * 5) + self._not_tracked_instantly((12, 10**10, 'a_' * 100)) + self._not_tracked_instantly((object(),)) + self._not_tracked(((1, x), y, (2, 3))) + self._not_tracked((1, 2, (None, True, False, ()), int)) + self._not_tracked((object(), ())) # Tuples with mutable elements are always tracked, even if those # elements are not tracked right now. @@ -343,6 +355,12 @@ def check_track_dynamic(self, tp, always_track): self._tracked(tp(tuple([obj]) for obj in [x, y, z])) self._tracked(tuple(tp([obj]) for obj in [x, y, z])) + t = tp([1, x, y, z]) + self.assertEqual(type(t), tp) + self._tracked(t) + self.assertEqual(type(t[:]), tuple) + self._tracked(t[:]) + @support.cpython_only def test_track_dynamic(self): # Test GC-optimization of dynamically constructed tuples. diff --git a/Lib/test/test_turtle.py b/Lib/test/test_turtle.py index d02cac284a909a..12d2eed874148c 100644 --- a/Lib/test/test_turtle.py +++ b/Lib/test/test_turtle.py @@ -60,12 +60,25 @@ def patch_screen(): We must patch the _Screen class itself instead of the _Screen instance because instantiating it requires a display. """ + # Create a mock screen that delegates color validation to the real TurtleScreen methods + mock_screen = unittest.mock.MagicMock() + mock_screen.__class__ = turtle._Screen + mock_screen.mode.return_value = "standard" + mock_screen._colormode = 1.0 + + def mock_iscolorstring(color): + valid_colors = {'red', 'green', 'blue', 'black', 'white', 'yellow', + 'orange', 'purple', 'pink', 'brown', 'gray', 'grey', + 'cyan', 'magenta'} + + return color in valid_colors or (isinstance(color, str) and color.startswith('#')) + + mock_screen._iscolorstring = mock_iscolorstring + mock_screen._colorstr = turtle._Screen._colorstr.__get__(mock_screen) + return unittest.mock.patch( "turtle._Screen.__new__", - **{ - "return_value.__class__": turtle._Screen, - "return_value.mode.return_value": "standard", - }, + return_value=mock_screen ) @@ -635,6 +648,28 @@ def test_poly_context_when_creating_poly(self): self.assertTrue(self.turtle._creatingPoly) self.assertFalse(self.turtle._creatingPoly) + def test_dot_signature(self): + self.turtle.dot() + self.turtle.dot(10) + self.turtle.dot(size=10) + self.turtle.dot((0, 0, 0)) + self.turtle.dot(size=(0, 0, 0)) + self.turtle.dot("blue") + self.turtle.dot("") + self.turtle.dot(size="blue") + self.turtle.dot(20, "blue") + self.turtle.dot(20, "blue") + self.turtle.dot(20, (0, 0, 0)) + self.turtle.dot(20, 0, 0, 0) + with self.assertRaises(TypeError): + self.turtle.dot(color="blue") + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, "_not_a_color_") + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, (0, 0, 0, 0)) + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, 0, 0, 0, 0) + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, (-1, 0, 0)) + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, -1, 0, 0) + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, (0, 257, 0)) + self.assertRaises(turtle.TurtleGraphicsError, self.turtle.dot, 0, 0, 257, 0) class TestModuleLevel(unittest.TestCase): def test_all_signatures(self): diff --git a/Lib/test/test_type_annotations.py b/Lib/test/test_type_annotations.py index 2c886bb6d362fa..4c58fade1b4f26 100644 --- a/Lib/test/test_type_annotations.py +++ b/Lib/test/test_type_annotations.py @@ -498,6 +498,28 @@ def f(x: int) -> int: pass self.assertEqual(f.__annotate__(annotationlib.Format.VALUE), annos) self.assertEqual(f.__annotations__, annos) + def test_set_annotations(self): + function_code = textwrap.dedent(""" + def f(x: int): + pass + """) + class_code = textwrap.dedent(""" + class f: + x: int + """) + for future in (False, True): + for label, code in (("function", function_code), ("class", class_code)): + with self.subTest(future=future, label=label): + if future: + code = "from __future__ import annotations\n" + code + ns = run_code(code) + f = ns["f"] + anno = "int" if future else int + self.assertEqual(f.__annotations__, {"x": anno}) + + f.__annotations__ = {"x": str} + self.assertEqual(f.__annotations__, {"x": str}) + def test_name_clash_with_format(self): # this test would fail if __annotate__'s parameter was called "format" # during symbol table construction @@ -813,3 +835,40 @@ def test_complex_comprehension_inlining_exec(self): genexp = annos["unique_name_2"][0] lamb = list(genexp)[0] self.assertEqual(lamb(), 42) + + # gh-138349 + def test_module_level_annotation_plus_listcomp(self): + cases = [ + """ + def report_error(): + pass + try: + [0 for name_2 in unique_name_0 if (lambda: name_2)] + except: + pass + annotated_name: 0 + """, + """ + class Generic: + pass + try: + [0 for name_2 in unique_name_0 if (0 for unique_name_1 in unique_name_2 for unique_name_3 in name_2)] + except: + pass + annotated_name: 0 + """, + """ + class Generic: + pass + annotated_name: 0 + try: + [0 for name_2 in [[0]] for unique_name_1 in unique_name_2 if (lambda: name_2)] + except: + pass + """, + ] + for code in cases: + with self.subTest(code=code): + mod = build_module(code) + annos = mod.__annotations__ + self.assertEqual(annos, {"annotated_name": 0}) diff --git a/Lib/test/test_type_comments.py b/Lib/test/test_type_comments.py index ee8939f62d082c..c40c45594f4d80 100644 --- a/Lib/test/test_type_comments.py +++ b/Lib/test/test_type_comments.py @@ -344,7 +344,7 @@ def test_longargs(self): todo = set(t.name[1:]) self.assertEqual(len(t.args.args) + len(t.args.posonlyargs), len(todo) - bool(t.args.vararg) - bool(t.args.kwarg)) - self.assertTrue(t.name.startswith('f'), t.name) + self.assertStartsWith(t.name, 'f') for index, c in enumerate(t.name[1:]): todo.remove(c) if c == 'v': diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 3552b6b4ef846c..fc26e71ffcb67b 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2,7 +2,7 @@ from test.support import ( run_with_locale, cpython_only, no_rerun, - MISSING_C_DOCSTRINGS, EqualToForwardRef, + MISSING_C_DOCSTRINGS, EqualToForwardRef, check_disallow_instantiation, ) from test.support.script_helper import assert_python_ok from test.support.import_helper import import_fresh_module @@ -517,8 +517,8 @@ def test(f, format_spec, result): # and a number after the decimal. This is tricky, because # a totally empty format specifier means something else. # So, just use a sign flag - test(1e200, '+g', '+1e+200') - test(1e200, '+', '+1e+200') + test(1.25e200, '+g', '+1.25e+200') + test(1.25e200, '+', '+1.25e+200') test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+', '+1.1e+200') @@ -827,15 +827,15 @@ def test_instancecheck_and_subclasscheck(self): self.assertIsInstance(True, x) self.assertIsInstance('a', x) self.assertNotIsInstance(None, x) - self.assertTrue(issubclass(int, x)) - self.assertTrue(issubclass(bool, x)) - self.assertTrue(issubclass(str, x)) - self.assertFalse(issubclass(type(None), x)) + self.assertIsSubclass(int, x) + self.assertIsSubclass(bool, x) + self.assertIsSubclass(str, x) + self.assertNotIsSubclass(type(None), x) for x in (int | None, typing.Union[int, None]): with self.subTest(x=x): self.assertIsInstance(None, x) - self.assertTrue(issubclass(type(None), x)) + self.assertIsSubclass(type(None), x) for x in ( int | collections.abc.Mapping, @@ -844,8 +844,8 @@ def test_instancecheck_and_subclasscheck(self): with self.subTest(x=x): self.assertIsInstance({}, x) self.assertNotIsInstance((), x) - self.assertTrue(issubclass(dict, x)) - self.assertFalse(issubclass(list, x)) + self.assertIsSubclass(dict, x) + self.assertNotIsSubclass(list, x) def test_instancecheck_and_subclasscheck_order(self): T = typing.TypeVar('T') @@ -857,7 +857,7 @@ def test_instancecheck_and_subclasscheck_order(self): for x in will_resolve: with self.subTest(x=x): self.assertIsInstance(1, x) - self.assertTrue(issubclass(int, x)) + self.assertIsSubclass(int, x) wont_resolve = ( T | int, @@ -890,7 +890,7 @@ class BadMeta(type): def __subclasscheck__(cls, sub): 1/0 x = int | BadMeta('A', (), {}) - self.assertTrue(issubclass(int, x)) + self.assertIsSubclass(int, x) self.assertRaises(ZeroDivisionError, issubclass, list, x) def test_or_type_operator_with_TypeVar(self): @@ -1148,8 +1148,7 @@ def test_or_type_operator_reference_cycle(self): msg='Check for union reference leak.') def test_instantiation(self): - with self.assertRaises(TypeError): - types.UnionType() + check_disallow_instantiation(self, types.UnionType) self.assertIs(int, types.UnionType[int]) self.assertIs(int, types.UnionType[int, int]) self.assertEqual(int | str, types.UnionType[int, str]) @@ -1399,7 +1398,7 @@ def test_new_class_basics(self): def test_new_class_subclass(self): C = types.new_class("C", (int,)) - self.assertTrue(issubclass(C, int)) + self.assertIsSubclass(C, int) def test_new_class_meta(self): Meta = self.Meta @@ -1444,7 +1443,7 @@ def func(ns): bases=(int,), kwds=dict(metaclass=Meta, z=2), exec_body=func) - self.assertTrue(issubclass(C, int)) + self.assertIsSubclass(C, int) self.assertIsInstance(C, Meta) self.assertEqual(C.x, 0) self.assertEqual(C.y, 1) @@ -2513,15 +2512,16 @@ class SubinterpreterTests(unittest.TestCase): def setUpClass(cls): global interpreters try: - from test.support import interpreters + from concurrent import interpreters except ModuleNotFoundError: raise unittest.SkipTest('subinterpreters required') - import test.support.interpreters.channels + from test.support import channels # noqa: F401 + cls.create_channel = staticmethod(channels.create) @cpython_only @no_rerun('channels (and queues) might have a refleak; see gh-122199') def test_static_types_inherited_slots(self): - rch, sch = interpreters.channels.create() + rch, sch = self.create_channel() script = textwrap.dedent(""" import test.support @@ -2547,7 +2547,7 @@ def collate_results(raw): main_results = collate_results(raw) interp = interpreters.create() - interp.exec('from test.support import interpreters') + interp.exec('from concurrent import interpreters') interp.prepare_main(sch=sch) interp.exec(script) raw = rch.recv_nowait() diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 8c55ba4623e719..b660368144b1cd 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -762,6 +762,16 @@ class A(Generic[T, P, U]): ... self.assertEqual(A[float, [range]].__args__, (float, (range,), float)) self.assertEqual(A[float, [range], int].__args__, (float, (range,), int)) + def test_paramspec_and_typevar_specialization_2(self): + T = TypeVar("T") + P = ParamSpec('P', default=...) + U = TypeVar("U", default=float) + self.assertEqual(P.__default__, ...) + class A(Generic[T, P, U]): ... + self.assertEqual(A[float].__args__, (float, ..., float)) + self.assertEqual(A[float, [range]].__args__, (float, (range,), float)) + self.assertEqual(A[float, [range], int].__args__, (float, (range,), int)) + def test_typevartuple_none(self): U = TypeVarTuple('U') U_None = TypeVarTuple('U_None', default=None) @@ -2267,6 +2277,15 @@ class Ints(enum.IntEnum): self.assertEqual(Union[Literal[1], Literal[Ints.B], Literal[True]].__args__, (Literal[1], Literal[Ints.B], Literal[True])) + def test_allow_non_types_in_or(self): + # gh-140348: Test that using | with a Union object allows things that are + # not allowed by is_unionable(). + U1 = Union[int, str] + self.assertEqual(U1 | float, Union[int, str, float]) + self.assertEqual(U1 | "float", Union[int, str, "float"]) + self.assertEqual(float | U1, Union[float, int, str]) + self.assertEqual("float" | U1, Union["float", int, str]) + class TupleTests(BaseTestCase): @@ -4674,6 +4693,34 @@ class D(Generic[T]): pass with self.assertRaises(TypeError): D[()] + def test_generic_init_subclass_not_called_error(self): + notes = ["Note: this exception may have been caused by " + r"'GenericTests.test_generic_init_subclass_not_called_error.<locals>.Base.__init_subclass__' " + "(or the '__init_subclass__' method on a superclass) not calling 'super().__init_subclass__()'"] + + class Base: + def __init_subclass__(cls) -> None: + # Oops, I forgot super().__init_subclass__()! + pass + + with self.subTest(): + class Sub(Base, Generic[T]): + pass + + with self.assertRaises(AttributeError) as cm: + Sub[int] + + self.assertEqual(cm.exception.__notes__, notes) + + with self.subTest(): + class Sub[U](Base): + pass + + with self.assertRaises(AttributeError) as cm: + Sub[int] + + self.assertEqual(cm.exception.__notes__, notes) + def test_generic_subclass_checks(self): for typ in [list[int], List[int], tuple[int, str], Tuple[int, str], @@ -5797,6 +5844,23 @@ class A: with self.assertRaises(TypeError): a[int] + def test_return_non_tuple_while_unpacking(self): + # GH-138497: GenericAlias objects didn't ensure that __typing_subst__ actually + # returned a tuple + class EvilTypeVar: + __typing_is_unpacked_typevartuple__ = True + def __typing_prepare_subst__(*_): + return None # any value + def __typing_subst__(*_): + return 42 # not tuple + + evil = EvilTypeVar() + # Create a dummy TypeAlias that will be given the evil generic from + # above. + type type_alias[*_] = 0 + with self.assertRaisesRegex(TypeError, ".+__typing_subst__.+tuple.+int.*"): + type_alias[evil][0] + class ClassVarTests(BaseTestCase): @@ -6859,12 +6923,10 @@ def test_forward_ref_and_final(self): self.assertEqual(hints, {'value': Final}) def test_top_level_class_var(self): - # https://bugs.python.org/issue45166 - with self.assertRaisesRegex( - TypeError, - r'typing.ClassVar\[int\] is not valid as type argument', - ): - get_type_hints(ann_module6) + # This is not meaningful but we don't raise for it. + # https://github.com/python/cpython/issues/133959 + hints = get_type_hints(ann_module6) + self.assertEqual(hints, {'wrong': ClassVar[int]}) def test_get_type_hints_typeddict(self): self.assertEqual(get_type_hints(TotalMovie), {'title': str, 'year': int}) @@ -6967,6 +7029,11 @@ def foo(a: 'Callable[..., T]'): self.assertEqual(get_type_hints(foo, globals(), locals()), {'a': Callable[..., T]}) + def test_special_forms_no_forward(self): + def f(x: ClassVar[int]): + pass + self.assertEqual(get_type_hints(f), {'x': ClassVar[int]}) + def test_special_forms_forward(self): class C: @@ -6982,8 +7049,9 @@ class CF: self.assertEqual(get_type_hints(C, globals())['b'], Final[int]) self.assertEqual(get_type_hints(C, globals())['x'], ClassVar) self.assertEqual(get_type_hints(C, globals())['y'], Final) - with self.assertRaises(TypeError): - get_type_hints(CF, globals()), + lfi = get_type_hints(CF, globals())['b'] + self.assertIs(get_origin(lfi), list) + self.assertEqual(get_args(lfi), (Final[int],)) def test_union_forward_recursion(self): ValueList = List['Value'] @@ -7111,6 +7179,19 @@ def add_right(self, node: 'Node[T]' = None): right_hints = get_type_hints(t.add_right, globals(), locals()) self.assertEqual(right_hints['node'], Node[T]) + def test_stringified_typeddict(self): + ns = run_code( + """ + from __future__ import annotations + from typing import TypedDict + class TD[UniqueT](TypedDict): + a: UniqueT + """ + ) + TD = ns['TD'] + self.assertEqual(TD.__annotations__, {'a': EqualToForwardRef('UniqueT', owner=TD, module=TD.__module__)}) + self.assertEqual(get_type_hints(TD), {'a': TD.__type_params__[0]}) + class GetUtilitiesTestCase(TestCase): def test_get_origin(self): @@ -7216,33 +7297,119 @@ class C(Generic[T]): pass class EvaluateForwardRefTests(BaseTestCase): def test_evaluate_forward_ref(self): int_ref = ForwardRef('int') - missing = ForwardRef('missing') + self.assertIs(typing.evaluate_forward_ref(int_ref), int) self.assertIs( typing.evaluate_forward_ref(int_ref, type_params=()), int, ) + self.assertIs( + typing.evaluate_forward_ref(int_ref, format=annotationlib.Format.VALUE), + int, + ) self.assertIs( typing.evaluate_forward_ref( - int_ref, type_params=(), format=annotationlib.Format.FORWARDREF, + int_ref, format=annotationlib.Format.FORWARDREF, ), int, ) + self.assertEqual( + typing.evaluate_forward_ref( + int_ref, format=annotationlib.Format.STRING, + ), + 'int', + ) + + def test_evaluate_forward_ref_undefined(self): + missing = ForwardRef('missing') + with self.assertRaises(NameError): + typing.evaluate_forward_ref(missing) self.assertIs( typing.evaluate_forward_ref( - missing, type_params=(), format=annotationlib.Format.FORWARDREF, + missing, format=annotationlib.Format.FORWARDREF, ), missing, ) self.assertEqual( typing.evaluate_forward_ref( - int_ref, type_params=(), format=annotationlib.Format.STRING, + missing, format=annotationlib.Format.STRING, ), - 'int', + "missing", ) - def test_evaluate_forward_ref_no_type_params(self): - ref = ForwardRef('int') - self.assertIs(typing.evaluate_forward_ref(ref), int) + def test_evaluate_forward_ref_nested(self): + ref = ForwardRef("int | list['str']") + self.assertEqual( + typing.evaluate_forward_ref(ref), + int | list[str], + ) + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.FORWARDREF), + int | list[str], + ) + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.STRING), + "int | list['str']", + ) + + why = ForwardRef('"\'str\'"') + self.assertIs(typing.evaluate_forward_ref(why), str) + + def test_evaluate_forward_ref_none(self): + none_ref = ForwardRef('None') + self.assertIs(typing.evaluate_forward_ref(none_ref), None) + + def test_globals(self): + A = "str" + ref = ForwardRef('list[A]') + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + self.assertEqual( + typing.evaluate_forward_ref(ref, globals={'A': A}), + list[str], + ) + + def test_owner(self): + ref = ForwardRef("A") + + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + + # We default to the globals of `owner`, + # so it no longer raises `NameError` + self.assertIs( + typing.evaluate_forward_ref(ref, owner=Loop), A + ) + + def test_inherited_owner(self): + # owner passed to evaluate_forward_ref + ref = ForwardRef("list['A']") + self.assertEqual( + typing.evaluate_forward_ref(ref, owner=Loop), + list[A], + ) + + # owner set on the ForwardRef + ref = ForwardRef("list['A']", owner=Loop) + self.assertEqual( + typing.evaluate_forward_ref(ref), + list[A], + ) + + def test_partial_evaluation(self): + ref = ForwardRef("list[A]") + with self.assertRaises(NameError): + typing.evaluate_forward_ref(ref) + + self.assertEqual( + typing.evaluate_forward_ref(ref, format=annotationlib.Format.FORWARDREF), + list[EqualToForwardRef('A')], + ) + + def test_with_module(self): + from test.typinganndata import fwdref_module + + typing.evaluate_forward_ref( + fwdref_module.fw,) class CollectionsAbcTests(BaseTestCase): @@ -7339,6 +7506,16 @@ def test_mutablesequence(self): self.assertIsInstance([], typing.MutableSequence) self.assertNotIsInstance((), typing.MutableSequence) + def test_bytestring(self): + with self.assertWarns(DeprecationWarning): + self.assertIsInstance(b'', typing.ByteString) + with self.assertWarns(DeprecationWarning): + self.assertIsInstance(bytearray(b''), typing.ByteString) + with self.assertWarns(DeprecationWarning): + class Foo(typing.ByteString): ... + with self.assertWarns(DeprecationWarning): + class Bar(typing.ByteString, typing.Awaitable): ... + def test_list(self): self.assertIsSubclass(list, typing.List) @@ -8538,6 +8715,36 @@ class Child(Base1, Base2): self.assertEqual(Child.__required_keys__, frozenset(['a'])) self.assertEqual(Child.__optional_keys__, frozenset()) + def test_inheritance_pep563(self): + def _make_td(future, class_name, annos, base, extra_names=None): + lines = [] + if future: + lines.append('from __future__ import annotations') + lines.append('from typing import TypedDict') + lines.append(f'class {class_name}({base}):') + for name, anno in annos.items(): + lines.append(f' {name}: {anno}') + code = '\n'.join(lines) + ns = run_code(code, extra_names) + return ns[class_name] + + for base_future in (True, False): + for child_future in (True, False): + with self.subTest(base_future=base_future, child_future=child_future): + base = _make_td( + base_future, "Base", {"base": "int"}, "TypedDict" + ) + self.assertIsNotNone(base.__annotate__) + child = _make_td( + child_future, "Child", {"child": "int"}, "Base", {"Base": base} + ) + base_anno = ForwardRef("int", module="builtins", owner=base) if base_future else int + child_anno = ForwardRef("int", module="builtins", owner=child) if child_future else int + self.assertEqual(base.__annotations__, {'base': base_anno}) + self.assertEqual( + child.__annotations__, {'child': child_anno, 'base': base_anno} + ) + def test_required_notrequired_keys(self): self.assertEqual(NontotalMovie.__required_keys__, frozenset({"title"})) @@ -9691,6 +9898,19 @@ class B(str): ... self.assertIs(type(field_c2.__metadata__[0]), float) self.assertIs(type(field_c3.__metadata__[0]), bool) + def test_forwardref_partial_evaluation(self): + # Test that Annotated partially evaluates if it contains a ForwardRef + # See: https://github.com/python/cpython/issues/137706 + def f(x: Annotated[undefined, '']): pass + + ann = annotationlib.get_annotations(f, format=annotationlib.Format.FORWARDREF) + + # Test that the attributes are retrievable from the partially evaluated annotation + x_ann = ann['x'] + self.assertIs(get_origin(x_ann), Annotated) + self.assertEqual(x_ann.__origin__, EqualToForwardRef('undefined', owner=f)) + self.assertEqual(x_ann.__metadata__, ('',)) + class TypeAliasTests(BaseTestCase): def test_canonical_usage_with_variable_annotation(self): @@ -10313,6 +10533,7 @@ def test_special_attrs(self): typing.AsyncIterable: 'AsyncIterable', typing.AsyncIterator: 'AsyncIterator', typing.Awaitable: 'Awaitable', + typing.ByteString: 'ByteString', typing.Callable: 'Callable', typing.ChainMap: 'ChainMap', typing.Collection: 'Collection', @@ -10731,6 +10952,9 @@ def test_eq(self): with self.assertWarns(DeprecationWarning): self.assertNotEqual(int, typing._UnionGenericAlias) + def test_hashable(self): + self.assertEqual(hash(typing._UnionGenericAlias), hash(Union)) + def load_tests(loader, tests, pattern): import doctest diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 993e6bf013cfbf..91d45283eb3b1b 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -12,7 +12,7 @@ class MyException(Exception): def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestCM: @@ -480,7 +480,7 @@ def test_setup_get_event_loop(self): class TestCase1(unittest.IsolatedAsyncioTestCase): def setUp(self): - asyncio._get_event_loop_policy().get_event_loop() + asyncio.events._get_event_loop_policy().get_event_loop() async def test_demo1(self): pass @@ -490,7 +490,7 @@ async def test_demo1(self): self.assertTrue(result.wasSuccessful()) def test_loop_factory(self): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class TestCase1(unittest.IsolatedAsyncioTestCase): loop_factory = asyncio.EventLoop diff --git a/Lib/test/test_unittest/test_case.py b/Lib/test/test_unittest/test_case.py index a04af55f3fc0ae..d66cab146af246 100644 --- a/Lib/test/test_unittest/test_case.py +++ b/Lib/test/test_unittest/test_case.py @@ -1989,7 +1989,7 @@ def testAssertNoLogsYieldsNone(self): pass self.assertIsNone(value) - def testAssertStartswith(self): + def testAssertStartsWith(self): self.assertStartsWith('ababahalamaha', 'ababa') self.assertStartsWith('ababahalamaha', ('x', 'ababa', 'y')) self.assertStartsWith(UserString('ababahalamaha'), 'ababa') @@ -2034,7 +2034,7 @@ def testAssertStartswith(self): self.assertStartsWith('ababahalamaha', 'amaha', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertNotStartswith(self): + def testAssertNotStartsWith(self): self.assertNotStartsWith('ababahalamaha', 'amaha') self.assertNotStartsWith('ababahalamaha', ('x', 'amaha', 'y')) self.assertNotStartsWith(UserString('ababahalamaha'), 'amaha') @@ -2079,7 +2079,7 @@ def testAssertNotStartswith(self): self.assertNotStartsWith('ababahalamaha', 'ababa', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertEndswith(self): + def testAssertEndsWith(self): self.assertEndsWith('ababahalamaha', 'amaha') self.assertEndsWith('ababahalamaha', ('x', 'amaha', 'y')) self.assertEndsWith(UserString('ababahalamaha'), 'amaha') @@ -2124,7 +2124,7 @@ def testAssertEndswith(self): self.assertEndsWith('ababahalamaha', 'ababa', msg='abracadabra') self.assertIn('ababahalamaha', str(cm.exception)) - def testAssertNotEndswith(self): + def testAssertNotEndsWith(self): self.assertNotEndsWith('ababahalamaha', 'ababa') self.assertNotEndsWith('ababahalamaha', ('x', 'ababa', 'y')) self.assertNotEndsWith(UserString('ababahalamaha'), 'ababa') diff --git a/Lib/test/test_unittest/test_program.py b/Lib/test/test_unittest/test_program.py index 6092ed292d8f60..8ed92373e5e984 100644 --- a/Lib/test/test_unittest/test_program.py +++ b/Lib/test/test_unittest/test_program.py @@ -75,6 +75,14 @@ def testUnexpectedSuccess(self): class Empty(unittest.TestCase): pass + class SetUpClassFailure(unittest.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + raise Exception + def testPass(self): + pass + class TestLoader(unittest.TestLoader): """Test loader that returns a suite containing the supplied testcase.""" @@ -191,6 +199,18 @@ def test_ExitEmptySuite(self): out = stream.getvalue() self.assertIn('\nNO TESTS RAN\n', out) + def test_ExitSetUpClassFailureSuite(self): + stream = BufferedWriter() + with self.assertRaises(SystemExit) as cm: + unittest.main( + argv=["setup_class_failure"], + testRunner=unittest.TextTestRunner(stream=stream), + testLoader=self.TestLoader(self.SetUpClassFailure)) + self.assertEqual(cm.exception.code, 1) + out = stream.getvalue() + self.assertIn("ERROR: setUpClass", out) + self.assertIn("SetUpClassFailure", out) + class InitialisableProgram(unittest.TestProgram): exit = False diff --git a/Lib/test/test_unittest/testmock/testasync.py b/Lib/test/test_unittest/testmock/testasync.py index 0791675b5401ca..dc36ceeb6502e8 100644 --- a/Lib/test/test_unittest/testmock/testasync.py +++ b/Lib/test/test_unittest/testmock/testasync.py @@ -15,7 +15,7 @@ def tearDownModule(): - asyncio._set_event_loop_policy(None) + asyncio.events._set_event_loop_policy(None) class AsyncClass: diff --git a/Lib/test/test_unittest/testmock/testhelpers.py b/Lib/test/test_unittest/testmock/testhelpers.py index d1e48bde982040..0e82c723ec3eaa 100644 --- a/Lib/test/test_unittest/testmock/testhelpers.py +++ b/Lib/test/test_unittest/testmock/testhelpers.py @@ -1050,6 +1050,7 @@ def __post_init__(self): create_autospec(WithPostInit()), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithPostInit) self.assertIsInstance(mock.a, int) self.assertIsInstance(mock.b, int) @@ -1072,6 +1073,7 @@ class WithDefault: create_autospec(WithDefault(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithDefault) self.assertIsInstance(mock.a, int) self.assertIsInstance(mock.b, int) @@ -1087,6 +1089,7 @@ def b(self) -> int: create_autospec(WithMethod(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithMethod) self.assertIsInstance(mock.a, int) mock.b.assert_not_called() @@ -1102,11 +1105,29 @@ class WithNonFields: create_autospec(WithNonFields(1)), ]: with self.subTest(mock=mock): + self.assertIsInstance(mock, WithNonFields) with self.assertRaisesRegex(AttributeError, msg): mock.a with self.assertRaisesRegex(AttributeError, msg): mock.b + def test_dataclass_special_attrs(self): + @dataclass + class Description: + name: str + + for mock in [ + create_autospec(Description, instance=True), + create_autospec(Description(1)), + ]: + with self.subTest(mock=mock): + self.assertIsInstance(mock, Description) + self.assertIs(mock.__class__, Description) + self.assertIsInstance(mock.__dataclass_fields__, MagicMock) + self.assertIsInstance(mock.__dataclass_params__, MagicMock) + self.assertIsInstance(mock.__match_args__, MagicMock) + self.assertIsInstance(mock.__hash__, MagicMock) + class TestCallList(unittest.TestCase): def test_args_list_contains_call_list(self): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index d3af7a8489e650..35e4652a87b423 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -206,10 +206,97 @@ def test_tstrings(self): self.check_ast_roundtrip("t'foo'") self.check_ast_roundtrip("t'foo {bar}'") self.check_ast_roundtrip("t'foo {bar!s:.2f}'") - self.check_ast_roundtrip("t'foo {bar}' f'{bar}'") - self.check_ast_roundtrip("f'{bar}' t'foo {bar}'") - self.check_ast_roundtrip("t'foo {bar}' fr'\\hello {bar}'") - self.check_ast_roundtrip("t'foo {bar}' u'bar'") + self.check_ast_roundtrip("t'{a + b}'") + self.check_ast_roundtrip("t'{a + b:x}'") + self.check_ast_roundtrip("t'{a + b!s}'") + self.check_ast_roundtrip("t'{ {a}}'") + self.check_ast_roundtrip("t'{ {a}=}'") + self.check_ast_roundtrip("t'{{a}}'") + self.check_ast_roundtrip("t''") + self.check_ast_roundtrip('t""') + self.check_ast_roundtrip("t'{(lambda x: x)}'") + self.check_ast_roundtrip("t'{t'{x}'}'") + + def test_tstring_with_nonsensical_str_field(self): + # `value` suggests that the original code is `t'{test1}`, but `str` suggests otherwise + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.Name(id="test1", ctx=ast.Load()), str="test2", conversion=-1 + ) + ] + ) + ), + "t'{test2}'", + ) + + def test_tstring_with_none_str_field(self): + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="test1"), str=None, conversion=-1)] + ) + ), + "t'{test1}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ + ast.Interpolation( + value=ast.Lambda( + args=ast.arguments(args=[ast.arg(arg="x")]), + body=ast.Name(id="x"), + ), + str=None, + conversion=-1, + ) + ] + ) + ), + "t'{(lambda x: x)}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + # `str` field kept here + [ast.Interpolation(value=ast.Name(id="x"), str="y", conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{y}'}"''', + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="x"), str=None, conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{x}'}"''', + ) + self.assertEqual( + ast.unparse(ast.TemplateStr( + [ast.Interpolation(value=ast.Constant(value="foo"), str=None, conversion=114)] + )), + '''t"{'foo'!r}"''', + ) def test_strings(self): self.check_ast_roundtrip("u'foo'") diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c965860fbb10ef..ae524c5ffba6b1 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -109,7 +109,7 @@ def setUp(self): finally: f.close() self.pathname = os_helper.TESTFN - self.quoted_pathname = urllib.parse.quote(self.pathname) + self.quoted_pathname = urllib.parse.quote(os.fsencode(self.pathname)) self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname) def tearDown(self): @@ -1526,6 +1526,14 @@ def test_url2pathname(self): self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar') self.assertEqual(fn('data:blah'), 'data:blah') self.assertEqual(fn('data://blah'), f'data:{sep}{sep}blah') + self.assertEqual(fn('foo?bar'), 'foo') + self.assertEqual(fn('foo#bar'), 'foo') + self.assertEqual(fn('foo?bar=baz'), 'foo') + self.assertEqual(fn('foo?bar#baz'), 'foo') + self.assertEqual(fn('foo%3Fbar'), 'foo?bar') + self.assertEqual(fn('foo%23bar'), 'foo#bar') + self.assertEqual(fn('foo%3Fbar%3Dbaz'), 'foo?bar=baz') + self.assertEqual(fn('foo%3Fbar%23baz'), 'foo?bar#baz') def test_url2pathname_require_scheme(self): sep = os.path.sep @@ -1569,6 +1577,7 @@ def test_url2pathname_require_scheme_errors(self): urllib.request.url2pathname, url, require_scheme=True) + @unittest.skipIf(support.is_emscripten, "Fixed by https://github.com/emscripten-core/emscripten/pull/24593") def test_url2pathname_resolve_host(self): fn = urllib.request.url2pathname sep = os.path.sep @@ -1581,6 +1590,10 @@ def test_url2pathname_resolve_host(self): def test_url2pathname_win(self): fn = urllib.request.url2pathname self.assertEqual(fn('/C:/'), 'C:\\') + self.assertEqual(fn('//C:'), 'C:') + self.assertEqual(fn('//C:/'), 'C:\\') + self.assertEqual(fn('//C:\\'), 'C:\\') + self.assertEqual(fn('//C:80/'), 'C:80\\') self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C:"), 'C:') self.assertEqual(fn('///C:/'), 'C:\\') diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index e6a18476908495..d015267cefdd64 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -1,9 +1,13 @@ +import contextlib import errno +import sysconfig import unittest +from unittest import mock from test import support from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied +from test.support.warnings_helper import check_no_resource_warning import os import socket @@ -143,6 +147,43 @@ def test_ftp(self): ] self._test_urls(urls, self._extra_handlers()) + @support.requires_resource('walltime') + @unittest.skipIf(sysconfig.get_platform() == 'linux-ppc64le', + 'leaks on PPC64LE (gh-140691)') + def test_ftp_no_leak(self): + # gh-140691: When the data connection (but not control connection) + # cannot be made established, we shouldn't leave an open socket object. + + class MockError(OSError): + pass + + orig_create_connection = socket.create_connection + def patched_create_connection(address, *args, **kwargs): + """Simulate REJECTing connections to ports other than 21""" + host, port = address + if port != 21: + raise MockError() + return orig_create_connection(address, *args, **kwargs) + + url = 'ftp://www.pythontest.net/README' + entry = url, None, urllib.error.URLError + no_cache_handlers = [urllib.request.FTPHandler()] + cache_handlers = self._extra_handlers() + with mock.patch('socket.create_connection', patched_create_connection): + with check_no_resource_warning(self): + # Try without CacheFTPHandler + self._test_urls([entry], handlers=no_cache_handlers, + retry=False) + with check_no_resource_warning(self): + # Try with CacheFTPHandler (uncached) + self._test_urls([entry], cache_handlers, retry=False) + with check_no_resource_warning(self): + # Try with CacheFTPHandler (cached) + self._test_urls([entry], cache_handlers, retry=False) + # Try without the mock: the handler should not use a closed connection + with check_no_resource_warning(self): + self._test_urls([url], cache_handlers, retry=False) + def test_file(self): TESTFN = os_helper.TESTFN f = open(TESTFN, 'w') @@ -255,18 +296,16 @@ def _test_urls(self, urls, handlers, retry=True): else: req = expected_err = None + if expected_err: + context = self.assertRaises(expected_err) + else: + context = contextlib.nullcontext() + with socket_helper.transient_internet(url): - try: + f = None + with context: f = urlopen(url, req, support.INTERNET_TIMEOUT) - # urllib.error.URLError is a subclass of OSError - except OSError as err: - if expected_err: - msg = ("Didn't get expected error(s) %s for %s %s, got %s: %s" % - (expected_err, url, req, type(err), err)) - self.assertIsInstance(err, expected_err, msg) - else: - raise - else: + if f is not None: try: with time_out, \ socket_peer_reset, \ diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index aabc360289a0d0..b2bde5a9b1d696 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -2,6 +2,7 @@ import unicodedata import unittest import urllib.parse +from test import support RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -156,27 +157,25 @@ def checkRoundtrips(self, url, parsed, split, url2=None): self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) - def test_qsl(self): - for orig, expect in parse_qsl_test_cases: - result = urllib.parse.parse_qsl(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = [v for v in expect if len(v[1])] - result = urllib.parse.parse_qsl(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_qs(self): - for orig, expect in parse_qs_test_cases: - result = urllib.parse.parse_qs(orig, keep_blank_values=True) - self.assertEqual(result, expect, "Error parsing %r" % orig) - expect_without_blanks = {v: expect[v] - for v in expect if len(expect[v][0])} - result = urllib.parse.parse_qs(orig, keep_blank_values=False) - self.assertEqual(result, expect_without_blanks, - "Error parsing %r" % orig) - - def test_roundtrips(self): - str_cases = [ + @support.subTests('orig,expect', parse_qsl_test_cases) + def test_qsl(self, orig, expect): + result = urllib.parse.parse_qsl(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = [v for v in expect if len(v[1])] + result = urllib.parse.parse_qsl(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('orig,expect', parse_qs_test_cases) + def test_qs(self, orig, expect): + result = urllib.parse.parse_qs(orig, keep_blank_values=True) + self.assertEqual(result, expect) + expect_without_blanks = {v: expect[v] + for v in expect if len(expect[v][0])} + result = urllib.parse.parse_qs(orig, keep_blank_values=False) + self.assertEqual(result, expect_without_blanks) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,parsed,split', [ ('path/to/file', ('', '', 'path/to/file', '', '', ''), ('', '', 'path/to/file', '', '')), @@ -263,23 +262,21 @@ def test_roundtrips(self): ('sch_me:path/to/file', ('', '', 'sch_me:path/to/file', '', '', ''), ('', '', 'sch_me:path/to/file', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_cases += [ ('schème:path/to/file', ('', '', 'schème:path/to/file', '', '', ''), ('', '', 'schème:path/to/file', '', '')), - ] - for url, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split) - - def test_roundtrips_normalization(self): - str_cases = [ + ]) + def test_roundtrips(self, bytes, url, parsed, split): + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,url2,parsed,split', [ ('///path/to/file', '/path/to/file', ('', '', '/path/to/file', '', '', ''), @@ -300,22 +297,18 @@ def test_roundtrips_normalization(self): 'https:///tmp/junk.txt', ('https', '', '/tmp/junk.txt', '', '', ''), ('https', '', '/tmp/junk.txt', '', '')), - ] - def _encode(t): - return (t[0].encode('ascii'), - t[1].encode('ascii'), - tuple(x.encode('ascii') for x in t[2]), - tuple(x.encode('ascii') for x in t[3])) - bytes_cases = [_encode(x) for x in str_cases] - for url, url2, parsed, split in str_cases + bytes_cases: - with self.subTest(url): - self.checkRoundtrips(url, parsed, split, url2) - - def test_http_roundtrips(self): - # urllib.parse.urlsplit treats 'http:' as an optimized special case, - # so we test both 'http:' and 'https:' in all the following. - # Three cheers for white box knowledge! - str_cases = [ + ]) + def test_roundtrips_normalization(self, bytes, url, url2, parsed, split): + if bytes: + url = str_encode(url) + url2 = str_encode(url2) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + self.checkRoundtrips(url, parsed, split, url2) + + @support.subTests('bytes', (False, True)) + @support.subTests('scheme', ('http', 'https')) + @support.subTests('url,parsed,split', [ ('://www.python.org', ('www.python.org', '', '', '', ''), ('www.python.org', '', '', '')), @@ -331,23 +324,20 @@ def test_http_roundtrips(self): ('://a/b/c/d;p?q#f', ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), - ] - def _encode(t): - return (t[0].encode('ascii'), - tuple(x.encode('ascii') for x in t[1]), - tuple(x.encode('ascii') for x in t[2])) - bytes_cases = [_encode(x) for x in str_cases] - str_schemes = ('http', 'https') - bytes_schemes = (b'http', b'https') - str_tests = str_schemes, str_cases - bytes_tests = bytes_schemes, bytes_cases - for schemes, test_cases in (str_tests, bytes_tests): - for scheme in schemes: - for url, parsed, split in test_cases: - url = scheme + url - parsed = (scheme,) + parsed - split = (scheme,) + split - self.checkRoundtrips(url, parsed, split) + ]) + def test_http_roundtrips(self, bytes, scheme, url, parsed, split): + # urllib.parse.urlsplit treats 'http:' as an optimized special case, + # so we test both 'http:' and 'https:' in all the following. + # Three cheers for white box knowledge! + if bytes: + scheme = str_encode(scheme) + url = str_encode(url) + parsed = tuple_encode(parsed) + split = tuple_encode(split) + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected, *, relroundtrip=True): with self.subTest(base=base, relurl=relurl): @@ -363,12 +353,13 @@ def checkJoin(self, base, relurl, expected, *, relroundtrip=True): relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb)) self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) - def test_unparse_parse(self): - str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] - bytes_cases = [x.encode('ascii') for x in str_cases] - for u in str_cases + bytes_cases: - self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) - self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) + @support.subTests('bytes', (False, True)) + @support.subTests('u', ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]) + def test_unparse_parse(self, bytes, u): + if bytes: + u = str_encode(u) + self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) + self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) def test_RFC1808(self): # "normal" cases from RFC 1808: @@ -695,8 +686,8 @@ def test_urljoins_relative_base(self): self.checkJoin('///b/c', '///w', '///w') self.checkJoin('///b/c', 'w', '///b/w') - def test_RFC2732(self): - str_cases = [ + @support.subTests('bytes', (False, True)) + @support.subTests('url,hostname,port', [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), @@ -727,26 +718,28 @@ def test_RFC2732(self): ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), - ] - def _encode(t): - return t[0].encode('ascii'), t[1].encode('ascii'), t[2] - bytes_cases = [_encode(x) for x in str_cases] - for url, hostname, port in str_cases + bytes_cases: - urlparsed = urllib.parse.urlparse(url) - self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) - - str_cases = [ + ]) + def test_RFC2732(self, bytes, url, hostname, port): + if bytes: + url = str_encode(url) + hostname = str_encode(hostname) + urlparsed = urllib.parse.urlparse(url) + self.assertEqual((urlparsed.hostname, urlparsed.port), (hostname, port)) + + @support.subTests('bytes', (False, True)) + @support.subTests('invalid_url', [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', - 'http://[::ffff:12.34.56.78'] - bytes_cases = [x.encode('ascii') for x in str_cases] - for invalid_url in str_cases + bytes_cases: - self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) - - def test_urldefrag(self): - str_cases = [ + 'http://[::ffff:12.34.56.78']) + def test_RFC2732_invalid(self, bytes, invalid_url): + if bytes: + invalid_url = str_encode(invalid_url) + self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) + + @support.subTests('bytes', (False, True)) + @support.subTests('url,defrag,frag', [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), @@ -770,18 +763,18 @@ def test_urldefrag(self): ('http:?q#f', 'http:?q', 'f'), ('//a/b/c;p?q#f', '//a/b/c;p?q', 'f'), ('://a/b/c;p?q#f', '://a/b/c;p?q', 'f'), - ] - def _encode(t): - return type(t)(x.encode('ascii') for x in t) - bytes_cases = [_encode(x) for x in str_cases] - for url, defrag, frag in str_cases + bytes_cases: - with self.subTest(url): - result = urllib.parse.urldefrag(url) - hash = '#' if isinstance(url, str) else b'#' - self.assertEqual(result.geturl(), url.rstrip(hash)) - self.assertEqual(result, (defrag, frag)) - self.assertEqual(result.url, defrag) - self.assertEqual(result.fragment, frag) + ]) + def test_urldefrag(self, bytes, url, defrag, frag): + if bytes: + url = str_encode(url) + defrag = str_encode(defrag) + frag = str_encode(frag) + result = urllib.parse.urldefrag(url) + hash = '#' if isinstance(url, str) else b'#' + self.assertEqual(result.geturl(), url.rstrip(hash)) + self.assertEqual(result, (defrag, frag)) + self.assertEqual(result.url, defrag) + self.assertEqual(result.fragment, frag) def test_urlsplit_scoped_IPv6(self): p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') @@ -981,42 +974,35 @@ def test_urlsplit_strip_url(self): self.assertEqual(p.scheme, "https") self.assertEqual(p.geturl(), "https://www.python.org/") - def test_attributes_bad_port(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('port', ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६")) + def test_attributes_bad_port(self, bytes, parse, port): """Check handling of invalid ports.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"): - with self.subTest(bytes=bytes, parse=parse, port=port): - netloc = "www.example.net:" + port - url = "http://" + netloc + "/" - if bytes: - if netloc.isascii() and port.isascii(): - netloc = netloc.encode("ascii") - url = url.encode("ascii") - else: - continue - p = parse(url) - self.assertEqual(p.netloc, netloc) - with self.assertRaises(ValueError): - p.port + netloc = "www.example.net:" + port + url = "http://" + netloc + "/" + if bytes: + if not (netloc.isascii() and port.isascii()): + self.skipTest('non-ASCII bytes') + netloc = str_encode(netloc) + url = str_encode(url) + p = parse(url) + self.assertEqual(p.netloc, netloc) + with self.assertRaises(ValueError): + p.port - def test_attributes_bad_scheme(self): + @support.subTests('bytes', (False, True)) + @support.subTests('parse', (urllib.parse.urlsplit, urllib.parse.urlparse)) + @support.subTests('scheme', (".", "+", "-", "0", "http&", "६http")) + def test_attributes_bad_scheme(self, bytes, parse, scheme): """Check handling of invalid schemes.""" - for bytes in (False, True): - for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): - for scheme in (".", "+", "-", "0", "http&", "६http"): - with self.subTest(bytes=bytes, parse=parse, scheme=scheme): - url = scheme + "://www.example.net" - if bytes: - if url.isascii(): - url = url.encode("ascii") - else: - continue - p = parse(url) - if bytes: - self.assertEqual(p.scheme, b"") - else: - self.assertEqual(p.scheme, "") + url = scheme + "://www.example.net" + if bytes: + if not url.isascii(): + self.skipTest('non-ASCII bytes') + url = url.encode("ascii") + p = parse(url) + self.assertEqual(p.scheme, b"" if bytes else "") def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it @@ -1128,24 +1114,21 @@ def test_anyscheme(self): self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) - def test_default_scheme(self): + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_default_scheme(self, func): # Exercise the scheme parameter of urlparse() and urlsplit() - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - with self.subTest(function=func): - result = func("http://example.net/", "ftp") - self.assertEqual(result.scheme, "http") - result = func(b"http://example.net/", b"ftp") - self.assertEqual(result.scheme, b"http") - self.assertEqual(func("path", "ftp").scheme, "ftp") - self.assertEqual(func("path", scheme="ftp").scheme, "ftp") - self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") - self.assertEqual(func("path").scheme, "") - self.assertEqual(func(b"path").scheme, b"") - self.assertEqual(func(b"path", "").scheme, b"") - - def test_parse_fragments(self): - # Exercise the allow_fragments parameter of urlparse() and urlsplit() - tests = ( + result = func("http://example.net/", "ftp") + self.assertEqual(result.scheme, "http") + result = func(b"http://example.net/", b"ftp") + self.assertEqual(result.scheme, b"http") + self.assertEqual(func("path", "ftp").scheme, "ftp") + self.assertEqual(func("path", scheme="ftp").scheme, "ftp") + self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") + self.assertEqual(func("path").scheme, "") + self.assertEqual(func(b"path").scheme, b"") + self.assertEqual(func(b"path", "").scheme, b"") + + @support.subTests('url,attr,expected_frag', ( ("http:#frag", "path", "frag"), ("//example.net#frag", "path", "frag"), ("index.html#frag", "path", "frag"), @@ -1156,24 +1139,24 @@ def test_parse_fragments(self): ("//abc#@frag", "path", "@frag"), ("//abc:80#@frag", "path", "@frag"), ("//abc#@frag:80", "path", "@frag:80"), - ) - for url, attr, expected_frag in tests: - for func in (urllib.parse.urlparse, urllib.parse.urlsplit): - if attr == "params" and func is urllib.parse.urlsplit: - attr = "path" - with self.subTest(url=url, function=func): - result = func(url, allow_fragments=False) - self.assertEqual(result.fragment, "") - self.assertEndsWith(getattr(result, attr), - "#" + expected_frag) - self.assertEqual(func(url, "", False).fragment, "") - - result = func(url, allow_fragments=True) - self.assertEqual(result.fragment, expected_frag) - self.assertNotEndsWith(getattr(result, attr), expected_frag) - self.assertEqual(func(url, "", True).fragment, - expected_frag) - self.assertEqual(func(url).fragment, expected_frag) + )) + @support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit)) + def test_parse_fragments(self, url, attr, expected_frag, func): + # Exercise the allow_fragments parameter of urlparse() and urlsplit() + if attr == "params" and func is urllib.parse.urlsplit: + attr = "path" + result = func(url, allow_fragments=False) + self.assertEqual(result.fragment, "") + self.assertEndsWith(getattr(result, attr), + "#" + expected_frag) + self.assertEqual(func(url, "", False).fragment, "") + + result = func(url, allow_fragments=True) + self.assertEqual(result.fragment, expected_frag) + self.assertNotEndsWith(getattr(result, attr), expected_frag) + self.assertEqual(func(url, "", True).fragment, + expected_frag) + self.assertEqual(func(url).fragment, expected_frag) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes @@ -1199,7 +1182,14 @@ def test_mixed_types_rejected(self): with self.assertRaisesRegex(TypeError, "Cannot mix str"): urllib.parse.urljoin(b"http://python.org", "http://python.org") - def _check_result_type(self, str_type): + @support.subTests('result_type', [ + urllib.parse.DefragResult, + urllib.parse.SplitResult, + urllib.parse.ParseResult, + ]) + def test_result_pairs(self, result_type): + # Check encoding and decoding between result pairs + str_type = result_type num_args = len(str_type._fields) bytes_type = str_type._encoded_counterpart self.assertIs(bytes_type._decoded_counterpart, str_type) @@ -1224,16 +1214,6 @@ def _check_result_type(self, str_type): self.assertEqual(str_result.encode(encoding, errors), bytes_args) self.assertEqual(str_result.encode(encoding, errors), bytes_result) - def test_result_pairs(self): - # Check encoding and decoding between result pairs - result_types = [ - urllib.parse.DefragResult, - urllib.parse.SplitResult, - urllib.parse.ParseResult, - ] - for result_type in result_types: - self._check_result_type(result_type) - def test_parse_qs_encoding(self): result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") self.assertEqual(result, {'key': ['\u0141\xE9']}) @@ -1265,8 +1245,7 @@ def test_parse_qsl_max_num_fields(self): urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10) urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10) - def test_parse_qs_separator(self): - parse_qs_semicolon_cases = [ + @support.subTests('orig,expect', [ (";", {}), (";;", {}), (";a=b", {'a': ['b']}), @@ -1277,17 +1256,14 @@ def test_parse_qs_separator(self): (b";a=b", {b'a': [b'b']}), (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=1;a=2", {b'a': [b'1', b'2']}), - ] - for orig, expect in parse_qs_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qs(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qs(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) - - - def test_parse_qsl_separator(self): - parse_qsl_semicolon_cases = [ + ]) + def test_parse_qs_separator(self, orig, expect): + result = urllib.parse.parse_qs(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qs(orig, separator=b';') + self.assertEqual(result_bytes, expect) + + @support.subTests('orig,expect', [ (";", []), (";;", []), (";a=b", [('a', 'b')]), @@ -1298,13 +1274,12 @@ def test_parse_qsl_separator(self): (b";a=b", [(b'a', b'b')]), (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), - ] - for orig, expect in parse_qsl_semicolon_cases: - with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): - result = urllib.parse.parse_qsl(orig, separator=';') - self.assertEqual(result, expect, "Error parsing %r" % orig) - result_bytes = urllib.parse.parse_qsl(orig, separator=b';') - self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) + ]) + def test_parse_qsl_separator(self, orig, expect): + result = urllib.parse.parse_qsl(orig, separator=';') + self.assertEqual(result, expect) + result_bytes = urllib.parse.parse_qsl(orig, separator=b';') + self.assertEqual(result_bytes, expect) def test_parse_qsl_bytes(self): self.assertEqual(urllib.parse.parse_qsl(b'a=b'), [(b'a', b'b')]) @@ -1695,11 +1670,12 @@ def test_to_bytes(self): self.assertRaises(UnicodeError, urllib.parse._to_bytes, 'http://www.python.org/medi\u00e6val') - def test_unwrap(self): - for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>', - 'URL:scheme://host/path', 'scheme://host/path'): - url = urllib.parse.unwrap(wrapped_url) - self.assertEqual(url, 'scheme://host/path') + @support.subTests('wrapped_url', + ('<URL:scheme://host/path>', '<scheme://host/path>', + 'URL:scheme://host/path', 'scheme://host/path')) + def test_unwrap(self, wrapped_url): + url = urllib.parse.unwrap(wrapped_url) + self.assertEqual(url, 'scheme://host/path') class DeprecationTest(unittest.TestCase): @@ -1780,5 +1756,11 @@ def test_to_bytes_deprecation(self): 'urllib.parse.to_bytes() is deprecated as of 3.8') +def str_encode(s): + return s.encode('ascii') + +def tuple_encode(t): + return tuple(str_encode(x) for x in t) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_userdict.py b/Lib/test/test_userdict.py index ace84ef564df72..75de9ea252de98 100644 --- a/Lib/test/test_userdict.py +++ b/Lib/test/test_userdict.py @@ -166,7 +166,7 @@ def test_update(self): def test_missing(self): # Make sure UserDict doesn't have a __missing__ method - self.assertEqual(hasattr(collections.UserDict, "__missing__"), False) + self.assertNotHasAttr(collections.UserDict, "__missing__") # Test several cases: # (D) subclass defines __missing__ method returning a value # (E) subclass defines __missing__ method raising RuntimeError diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 958be5408ce90a..0e1a723ce3a151 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -14,6 +14,7 @@ from test import support from test.support import import_helper +from test.support.script_helper import assert_python_ok py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) @@ -1139,6 +1140,23 @@ def test_uuid_weakref(self): weak = weakref.ref(strong) self.assertIs(strong, weak()) + +class CommandLineTestCases: + uuid = None # to be defined in subclasses + + def do_test_standalone_uuid(self, version): + stdout = io.StringIO() + with contextlib.redirect_stdout(stdout): + self.uuid.main() + output = stdout.getvalue().strip() + u = self.uuid.UUID(output) + self.assertEqual(output, str(u)) + self.assertEqual(u.version, version) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid1"]) + def test_cli_uuid1(self): + self.do_test_standalone_uuid(1) + @mock.patch.object(sys, "argv", ["", "-u", "uuid3", "-n", "@dns"]) @mock.patch('sys.stderr', new_callable=io.StringIO) def test_cli_namespace_required_for_uuid3(self, mock_err): @@ -1213,14 +1231,53 @@ def test_cli_uuid5_ouputted_with_valid_namespace_and_name(self): self.assertEqual(output, str(uuid_output)) self.assertEqual(uuid_output.version, 5) + @mock.patch.object(sys, "argv", ["", "-u", "uuid6"]) + def test_cli_uuid6(self): + self.do_test_standalone_uuid(6) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid7"]) + def test_cli_uuid7(self): + self.do_test_standalone_uuid(7) + + @mock.patch.object(sys, "argv", ["", "-u", "uuid8"]) + def test_cli_uuid8(self): + self.do_test_standalone_uuid(8) -class TestUUIDWithoutExtModule(BaseTestUUID, unittest.TestCase): + +class TestUUIDWithoutExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = py_uuid + @unittest.skipUnless(c_uuid, 'requires the C _uuid module') -class TestUUIDWithExtModule(BaseTestUUID, unittest.TestCase): +class TestUUIDWithExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = c_uuid + def check_has_stable_libuuid_extractable_node(self): + if not self.uuid._has_stable_extractable_node: + self.skipTest("libuuid cannot deduce MAC address") + + @unittest.skipUnless(os.name == 'posix', 'POSIX only') + def test_unix_getnode_from_libuuid(self): + self.check_has_stable_libuuid_extractable_node() + script = 'import uuid; print(uuid._unix_getnode())' + _, n_a, _ = assert_python_ok('-c', script) + _, n_b, _ = assert_python_ok('-c', script) + n_a, n_b = n_a.decode().strip(), n_b.decode().strip() + self.assertTrue(n_a.isdigit()) + self.assertTrue(n_b.isdigit()) + self.assertEqual(n_a, n_b) + + @unittest.skipUnless(os.name == 'nt', 'Windows only') + def test_windows_getnode_from_libuuid(self): + self.check_has_stable_libuuid_extractable_node() + script = 'import uuid; print(uuid._windll_getnode())' + _, n_a, _ = assert_python_ok('-c', script) + _, n_b, _ = assert_python_ok('-c', script) + n_a, n_b = n_a.decode().strip(), n_b.decode().strip() + self.assertTrue(n_a.isdigit()) + self.assertTrue(n_b.isdigit()) + self.assertEqual(n_a, n_b) + class BaseTestInternals: _uuid = py_uuid diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index adc86a49b0668d..d108165be51e84 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -21,7 +21,7 @@ from test.support import (captured_stdout, captured_stderr, skip_if_broken_multiprocessing_synchronize, verbose, requires_subprocess, is_android, is_apple_mobile, - is_emscripten, is_wasi, + is_wasm32, requires_venv_with_pip, TEST_HOME_DIR, requires_resource, copy_python_src_ignore) from test.support.os_helper import (can_symlink, EnvironmentVarGuard, rmtree, @@ -42,7 +42,7 @@ or sys._base_executable != sys.executable, 'cannot run venv.create from within a venv on this platform') -if is_android or is_apple_mobile or is_emscripten or is_wasi: +if is_android or is_apple_mobile or is_wasm32: raise unittest.SkipTest("venv is not available on this platform") @requires_subprocess() @@ -522,6 +522,8 @@ def test_special_chars_bash(self): # gh-124651: test quoted strings @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') + @unittest.skipIf(sys.platform.startswith('netbsd'), + "NetBSD csh fails with quoted special chars; see gh-139308") def test_special_chars_csh(self): """ Test that the template strings are quoted properly (csh) @@ -774,7 +776,7 @@ def test_activate_shell_script_has_no_dos_newlines(self): with open(script_path, 'rb') as script: for i, line in enumerate(script, 1): error_message = f"CR LF found in line {i}" - self.assertFalse(line.endswith(b'\r\n'), error_message) + self.assertNotEndsWith(line, b'\r\n', error_message) @requireVenvCreate def test_scm_ignore_files_git(self): @@ -978,7 +980,7 @@ def do_test_with_pip(self, system_site_packages): self.assertEqual(err, "") out = out.decode("latin-1") # Force to text, prevent decoding errors expected_version = "pip {}".format(ensurepip.version()) - self.assertEqual(out[:len(expected_version)], expected_version) + self.assertStartsWith(out, expected_version) env_dir = os.fsencode(self.env_dir).decode("latin-1") self.assertIn(env_dir, out) diff --git a/Lib/test/test_warnings/__init__.py b/Lib/test/test_warnings/__init__.py index 05710c469348c4..4c3c715a8fbf0a 100644 --- a/Lib/test/test_warnings/__init__.py +++ b/Lib/test/test_warnings/__init__.py @@ -102,7 +102,7 @@ class PublicAPITests(BaseTest): """ def test_module_all_attribute(self): - self.assertTrue(hasattr(self.module, '__all__')) + self.assertHasAttr(self.module, '__all__') target_api = ["warn", "warn_explicit", "showwarning", "formatwarning", "filterwarnings", "simplefilter", "resetwarnings", "catch_warnings", "deprecated"] @@ -241,6 +241,85 @@ def test_once(self): 42) self.assertEqual(len(w), 0) + def test_filter_module(self): + MS_WINDOWS = (sys.platform == 'win32') + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module=r'package\.module\z') + self.module.warn_explicit('msg', UserWarning, 'filename', 42, + module='package.module') + self.assertEqual(len(w), 1) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, '/path/to/package/module', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, '/path/to/package/module.py', 42) + + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module='package') + self.module.warn_explicit('msg', UserWarning, 'filename', 42, + module='package.module') + self.assertEqual(len(w), 1) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, 'filename', 42, + module='other.package.module') + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, '/path/to/otherpackage/module.py', 42) + + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module=r'/path/to/package/module\z') + self.module.warn_explicit('msg', UserWarning, '/path/to/package/module', 42) + self.assertEqual(len(w), 1) + self.module.warn_explicit('msg', UserWarning, '/path/to/package/module.py', 42) + self.assertEqual(len(w), 2) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, '/PATH/TO/PACKAGE/MODULE', 42) + if MS_WINDOWS: + if self.module is py_warnings: + self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module.PY', 42) + self.assertEqual(len(w), 3) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module/__init__.py', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'/path/to/package/module.pyw', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'\path\to\package\module', 42) + + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module=r'/path/to/package/__init__\z') + self.module.warn_explicit('msg', UserWarning, '/path/to/package/__init__.py', 42) + self.assertEqual(len(w), 1) + self.module.warn_explicit('msg', UserWarning, '/path/to/package/__init__', 42) + self.assertEqual(len(w), 2) + + if MS_WINDOWS: + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module=r'C:\\path\\to\\package\\module\z') + self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module', 42) + self.assertEqual(len(w), 1) + self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.py', 42) + self.assertEqual(len(w), 2) + if self.module is py_warnings: + self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.PY', 42) + self.assertEqual(len(w), 3) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module.pyw', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'C:\PATH\TO\PACKAGE\MODULE', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'C:/path/to/package/module', 42) + with self.assertRaises(UserWarning): + self.module.warn_explicit('msg', UserWarning, r'C:\path\to\package\module\__init__.py', 42) + + with self.module.catch_warnings(record=True) as w: + self.module.simplefilter('error') + self.module.filterwarnings('always', module=r'<unknown>\z') + self.module.warn_explicit('msg', UserWarning, '', 42) + self.assertEqual(len(w), 1) + def test_module_globals(self): with self.module.catch_warnings(record=True) as w: self.module.simplefilter("always", UserWarning) @@ -555,13 +634,7 @@ def test_warn_explicit_non_ascii_filename(self): with self.module.catch_warnings(record=True) as w: self.module.resetwarnings() self.module.filterwarnings("always", category=UserWarning) - filenames = ["nonascii\xe9\u20ac"] - if not support.is_emscripten: - # JavaScript does not like surrogates. - # Invalid UTF-8 leading byte 0x80 encountered when - # deserializing a UTF-8 string in wasm memory to a JS - # string! - filenames.append("surrogate\udc80") + filenames = ["nonascii\xe9\u20ac", "surrogate\udc80"] for filename in filenames: try: os.fsencode(filename) @@ -735,7 +808,7 @@ class CWarnTests(WarnTests, unittest.TestCase): # test.import_helper.import_fresh_module utility function def test_accelerated(self): self.assertIsNot(original_warnings, self.module) - self.assertFalse(hasattr(self.module.warn, '__code__')) + self.assertNotHasAttr(self.module.warn, '__code__') class PyWarnTests(WarnTests, unittest.TestCase): module = py_warnings @@ -744,7 +817,7 @@ class PyWarnTests(WarnTests, unittest.TestCase): # test.import_helper.import_fresh_module utility function def test_pure_python(self): self.assertIsNot(original_warnings, self.module) - self.assertTrue(hasattr(self.module.warn, '__code__')) + self.assertHasAttr(self.module.warn, '__code__') class WCmdLineTests(BaseTest): @@ -1528,12 +1601,12 @@ def test_late_resource_warning(self): # (_warnings will try to import it) code = "f = open(%a)" % __file__ rc, out, err = assert_python_ok("-Wd", "-c", code) - self.assertTrue(err.startswith(expected), ascii(err)) + self.assertStartsWith(err, expected) # import the warnings module code = "import warnings; f = open(%a)" % __file__ rc, out, err = assert_python_ok("-Wd", "-c", code) - self.assertTrue(err.startswith(expected), ascii(err)) + self.assertStartsWith(err, expected) class AsyncTests(BaseTest): @@ -1875,6 +1948,25 @@ class D(C, x=3): self.assertEqual(D.inited, 3) + def test_existing_init_subclass_in_sibling_base(self): + @deprecated("A will go away soon") + class A: + pass + class B: + def __init_subclass__(cls, x): + super().__init_subclass__() + cls.inited = x + + with self.assertWarnsRegex(DeprecationWarning, "A will go away soon"): + class C(A, B, x=42): + pass + self.assertEqual(C.inited, 42) + + with self.assertWarnsRegex(DeprecationWarning, "A will go away soon"): + class D(B, A, x=42): + pass + self.assertEqual(D.inited, 42) + def test_init_subclass_has_correct_cls(self): init_subclass_saw = None diff --git a/Lib/test/test_wave.py b/Lib/test/test_wave.py index 5e771c8de969ec..346a343761a7c1 100644 --- a/Lib/test/test_wave.py +++ b/Lib/test/test_wave.py @@ -2,6 +2,7 @@ from test import audiotests from test import support import io +import os import struct import sys import wave @@ -222,6 +223,14 @@ def test_read_wrong_sample_width(self): with self.assertRaisesRegex(wave.Error, 'bad sample width'): wave.open(io.BytesIO(b)) + def test_open_in_write_raises(self): + # gh-136523: Wave_write.__del__ should not throw + with support.catch_unraisable_exception() as cm: + with self.assertRaises(OSError): + wave.open(os.curdir, "wb") + support.gc_collect() + self.assertIsNone(cm.unraisable) + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 4faad6629fe23c..4c7c900eb56ae1 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -432,7 +432,7 @@ def check_proxy(self, o, proxy): self.assertEqual(proxy.foo, 2, "proxy does not reflect attribute modification") del o.foo - self.assertFalse(hasattr(proxy, 'foo'), + self.assertNotHasAttr(proxy, 'foo', "proxy does not reflect attribute removal") proxy.foo = 1 @@ -442,7 +442,7 @@ def check_proxy(self, o, proxy): self.assertEqual(o.foo, 2, "object does not reflect attribute modification via proxy") del proxy.foo - self.assertFalse(hasattr(o, 'foo'), + self.assertNotHasAttr(o, 'foo', "object does not reflect attribute removal via proxy") def test_proxy_deletion(self): @@ -1108,7 +1108,7 @@ def meth(self): self.assertEqual(r.slot1, "abc") self.assertEqual(r.slot2, "def") self.assertEqual(r.meth(), "abcdef") - self.assertFalse(hasattr(r, "__dict__")) + self.assertNotHasAttr(r, "__dict__") def test_subclass_refs_with_cycle(self): """Confirm https://bugs.python.org/issue3100 is fixed.""" diff --git a/Lib/test/test_weakset.py b/Lib/test/test_weakset.py index 76e8e5c8ab7d3c..c1e4f9c8366e58 100644 --- a/Lib/test/test_weakset.py +++ b/Lib/test/test_weakset.py @@ -466,7 +466,7 @@ def test_copying(self): self.assertIsNot(dup, s) self.assertIs(dup.x, s.x) self.assertIs(dup.z, s.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') dup = copy.deepcopy(s) self.assertIsInstance(dup, cls) @@ -476,7 +476,7 @@ def test_copying(self): self.assertIsNot(dup.x, s.x) self.assertEqual(dup.z, s.z) self.assertIsNot(dup.z, s.z) - self.assertFalse(hasattr(dup, 'y')) + self.assertNotHasAttr(dup, 'y') if __name__ == "__main__": diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index d9076e77c158a2..1bae884ed9ae3e 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -17,9 +17,9 @@ class WindowsConsoleIOTests(unittest.TestCase): def test_abc(self): - self.assertTrue(issubclass(ConIO, io.RawIOBase)) - self.assertFalse(issubclass(ConIO, io.BufferedIOBase)) - self.assertFalse(issubclass(ConIO, io.TextIOBase)) + self.assertIsSubclass(ConIO, io.RawIOBase) + self.assertNotIsSubclass(ConIO, io.BufferedIOBase) + self.assertNotIsSubclass(ConIO, io.TextIOBase) def test_open_fd(self): self.assertRaisesRegex(ValueError, diff --git a/Lib/test/test_winreg.py b/Lib/test/test_winreg.py index 924a962781a75b..1bc830c02c39ce 100644 --- a/Lib/test/test_winreg.py +++ b/Lib/test/test_winreg.py @@ -3,6 +3,7 @@ import gc import os, sys, errno +import itertools import threading import unittest from platform import machine, win32_edition @@ -291,6 +292,37 @@ def run(self): DeleteKey(HKEY_CURRENT_USER, test_key_name+'\\changing_value') DeleteKey(HKEY_CURRENT_USER, test_key_name) + def test_queryvalueex_race_condition(self): + # gh-142282: QueryValueEx could read garbage buffer under race + # condition when another thread changes the value size + done = False + ready = threading.Event() + values = [b'ham', b'spam'] + + class WriterThread(threading.Thread): + def run(self): + with CreateKey(HKEY_CURRENT_USER, test_key_name) as key: + values_iter = itertools.cycle(values) + while not done: + val = next(values_iter) + SetValueEx(key, 'test_value', 0, REG_BINARY, val) + ready.set() + + thread = WriterThread() + thread.start() + try: + ready.wait() + with CreateKey(HKEY_CURRENT_USER, test_key_name) as key: + for _ in range(1000): + result, typ = QueryValueEx(key, 'test_value') + # The result must be one of the written values, + # not garbage data from uninitialized buffer + self.assertIn(result, values) + finally: + done = True + thread.join() + DeleteKey(HKEY_CURRENT_USER, test_key_name) + def test_long_key(self): # Issue2810, in 2.6 and 3.1 when the key name was exactly 256 # characters, EnumKey raised "WindowsError: More data is diff --git a/Lib/test/test_with.py b/Lib/test/test_with.py index fd7abd1782ec4d..f16611b29a2658 100644 --- a/Lib/test/test_with.py +++ b/Lib/test/test_with.py @@ -679,7 +679,7 @@ def testSingleComplexTarget(self): class C: pass blah = C() with mock_contextmanager_generator() as blah.foo: - self.assertEqual(hasattr(blah, "foo"), True) + self.assertHasAttr(blah, "foo") def testMultipleComplexTargets(self): class C: diff --git a/Lib/test/test_wmi.py b/Lib/test/test_wmi.py index ac7c9cb3a5a493..90eb40439d4b4a 100644 --- a/Lib/test/test_wmi.py +++ b/Lib/test/test_wmi.py @@ -70,8 +70,8 @@ def test_wmi_query_overflow(self): def test_wmi_query_multiple_rows(self): # Multiple instances should have an extra null separator r = wmi_exec_query("SELECT ProcessId FROM Win32_Process WHERE ProcessId < 1000") - self.assertFalse(r.startswith("\0"), r) - self.assertFalse(r.endswith("\0"), r) + self.assertNotStartsWith(r, "\0") + self.assertNotEndsWith(r, "\0") it = iter(r.split("\0")) try: while True: diff --git a/Lib/test/test_wsgiref.py b/Lib/test/test_wsgiref.py index b047f7b06f85d3..e04a4d2c2218a3 100644 --- a/Lib/test/test_wsgiref.py +++ b/Lib/test/test_wsgiref.py @@ -149,9 +149,9 @@ def bad_app(environ,start_response): start_response("200 OK", ('Content-Type','text/plain')) return ["Hello, world!"] out, err = run_amock(validator(bad_app)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual( err.splitlines()[-2], "AssertionError: Headers (('Content-Type', 'text/plain')) must" @@ -174,9 +174,9 @@ def bad_app(environ, start_response): for status, exc_message in tests: with self.subTest(status=status): out, err = run_amock(create_bad_app(status)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual(err.splitlines()[-2], exc_message) def test_wsgi_input(self): @@ -185,9 +185,9 @@ def bad_app(e,s): s("200 OK", [("Content-Type", "text/plain; charset=utf-8")]) return [b"data"] out, err = run_amock(validator(bad_app)) - self.assertTrue(out.endswith( + self.assertEndsWith(out, b"A server error occurred. Please contact the administrator." - )) + ) self.assertEqual( err.splitlines()[-2], "AssertionError" ) @@ -200,7 +200,7 @@ def app(e, s): ]) return [b"data"] out, err = run_amock(validator(app)) - self.assertTrue(err.endswith('"GET / HTTP/1.0" 200 4\n')) + self.assertEndsWith(err, '"GET / HTTP/1.0" 200 4\n') ver = sys.version.split()[0].encode('ascii') py = python_implementation().encode('ascii') pyver = py + b"/" + ver diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 5fe9d6884106ad..25c084c8b9c9eb 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -218,6 +218,33 @@ class ElementTreeTest(unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) + def test_constructor(self): + # Test constructor behavior. + + with self.assertRaises(TypeError): + tree = ET.ElementTree("") + with self.assertRaises(TypeError): + tree = ET.ElementTree(ET.ElementTree()) + + def test_setroot(self): + # Test _setroot behavior. + + tree = ET.ElementTree() + element = ET.Element("tag") + tree._setroot(element) + self.assertEqual(tree.getroot().tag, "tag") + self.assertEqual(tree.getroot(), element) + + # Test behavior with an invalid root element + + tree = ET.ElementTree() + with self.assertRaises(TypeError): + tree._setroot("") + with self.assertRaises(TypeError): + tree._setroot(ET.ElementTree()) + with self.assertRaises(TypeError): + tree._setroot(None) + def test_interface(self): # Test element tree interface. @@ -225,8 +252,7 @@ def check_element(element): self.assertTrue(ET.iselement(element), msg="not an element") direlem = dir(element) for attr in 'tag', 'attrib', 'text', 'tail': - self.assertTrue(hasattr(element, attr), - msg='no %s member' % attr) + self.assertHasAttr(element, attr) self.assertIn(attr, direlem, msg='no %s visible by dir' % attr) @@ -251,7 +277,7 @@ def check_element(element): # Make sure all standard element methods exist. def check_method(method): - self.assertTrue(hasattr(method, '__call__'), + self.assertHasAttr(method, '__call__', msg="%s not callable" % method) check_method(element.append) @@ -548,208 +574,6 @@ def test_parseliteral(self): self.assertEqual(len(ids), 1) self.assertEqual(ids["body"].tag, 'body') - def test_iterparse(self): - # Test iterparse interface. - - iterparse = ET.iterparse - - context = iterparse(SIMPLE_XMLFILE) - self.assertIsNone(context.root) - action, elem = next(context) - self.assertIsNone(context.root) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - context = iterparse(SIMPLE_NS_XMLFILE) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', '{namespace}element'), - ('end', '{namespace}element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ]) - - with open(SIMPLE_XMLFILE, 'rb') as source: - context = iterparse(source) - action, elem = next(context) - self.assertEqual((action, elem.tag), ('end', 'element')) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('end', 'element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - self.assertEqual(context.root.tag, 'root') - - events = () - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = () - context = iterparse(SIMPLE_XMLFILE, events=events) - self.assertEqual([(action, elem.tag) for action, elem in context], []) - - events = ("start", "end") - context = iterparse(SIMPLE_XMLFILE, events) - self.assertEqual([(action, elem.tag) for action, elem in context], [ - ('start', 'root'), - ('start', 'element'), - ('end', 'element'), - ('start', 'element'), - ('end', 'element'), - ('start', 'empty-element'), - ('end', 'empty-element'), - ('end', 'root'), - ]) - - events = ("start", "end", "start-ns", "end-ns") - context = iterparse(SIMPLE_NS_XMLFILE, events) - self.assertEqual([(action, elem.tag) if action in ("start", "end") - else (action, elem) - for action, elem in context], [ - ('start-ns', ('', 'namespace')), - ('start', '{namespace}root'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}element'), - ('end', '{namespace}element'), - ('start', '{namespace}empty-element'), - ('end', '{namespace}empty-element'), - ('end', '{namespace}root'), - ('end-ns', None), - ]) - - events = ('start-ns', 'end-ns') - context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) - res = [action for action, elem in context] - self.assertEqual(res, ['start-ns', 'end-ns']) - - events = ("start", "end", "bogus") - with open(SIMPLE_XMLFILE, "rb") as f: - with self.assertRaises(ValueError) as cm: - iterparse(f, events) - self.assertFalse(f.closed) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ValueError) as cm: - iterparse(SIMPLE_XMLFILE, events) - self.assertEqual(str(cm.exception), "unknown event 'bogus'") - del cm - - source = io.BytesIO( - b"<?xml version='1.0' encoding='iso-8859-1'?>\n" - b"<body xmlns='http://&#233;ffbot.org/ns'\n" - b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") - events = ("start-ns",) - context = iterparse(source, events) - self.assertEqual([(action, elem) for action, elem in context], [ - ('start-ns', ('', 'http://\xe9ffbot.org/ns')), - ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), - ]) - - source = io.StringIO("<document />junk") - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - - self.addCleanup(os_helper.unlink, TESTFN) - with open(TESTFN, "wb") as f: - f.write(b"<document />junk") - it = iterparse(TESTFN) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'document')) - with warnings_helper.check_no_resource_warning(self): - with self.assertRaises(ET.ParseError) as cm: - next(it) - self.assertEqual(str(cm.exception), - 'junk after document element: line 1, column 12') - del cm, it - - # Not exhausting the iterator still closes the resource (bpo-43292) - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - it.close() - del it - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with warnings_helper.check_no_resource_warning(self): - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - it.close() - self.assertEqual((action, elem.tag), ('end', 'element')) - del it, elem - - with self.assertRaises(FileNotFoundError): - iterparse("nonexistent") - - def test_iterparse_close(self): - iterparse = ET.iterparse - - it = iterparse(SIMPLE_XMLFILE) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - it = iterparse(SIMPLE_XMLFILE) - list(it) - it.close() - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - - with open(SIMPLE_XMLFILE, 'rb') as source: - it = iterparse(source) - list(it) - it.close() - self.assertFalse(source.closed) - with self.assertRaises(StopIteration): - next(it) - it.close() # idempotent - def test_writefile(self): elem = ET.Element("tag") elem.text = "text" @@ -1473,6 +1297,234 @@ def test_attlist_default(self): {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) +class IterparseTest(unittest.TestCase): + # Test iterparse interface. + + def test_basic(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + self.assertIsNone(it.root) + action, elem = next(it) + self.assertIsNone(it.root) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + it.close() + + it = iterparse(SIMPLE_NS_XMLFILE) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', '{namespace}element'), + ('end', '{namespace}element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ]) + it.close() + + def test_external_file(self): + with open(SIMPLE_XMLFILE, 'rb') as source: + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('end', 'element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + self.assertEqual(it.root.tag, 'root') + + def test_events(self): + iterparse = ET.iterparse + + events = () + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = () + it = iterparse(SIMPLE_XMLFILE, events=events) + self.assertEqual([(action, elem.tag) for action, elem in it], []) + it.close() + + events = ("start", "end") + it = iterparse(SIMPLE_XMLFILE, events) + self.assertEqual([(action, elem.tag) for action, elem in it], [ + ('start', 'root'), + ('start', 'element'), + ('end', 'element'), + ('start', 'element'), + ('end', 'element'), + ('start', 'empty-element'), + ('end', 'empty-element'), + ('end', 'root'), + ]) + it.close() + + def test_namespace_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "start-ns", "end-ns") + it = iterparse(SIMPLE_NS_XMLFILE, events) + self.assertEqual([(action, elem.tag) if action in ("start", "end") + else (action, elem) + for action, elem in it], [ + ('start-ns', ('', 'namespace')), + ('start', '{namespace}root'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}element'), + ('end', '{namespace}element'), + ('start', '{namespace}empty-element'), + ('end', '{namespace}empty-element'), + ('end', '{namespace}root'), + ('end-ns', None), + ]) + it.close() + + events = ('start-ns', 'end-ns') + it = iterparse(io.BytesIO(br"<root xmlns=''/>"), events) + res = [action for action, elem in it] + self.assertEqual(res, ['start-ns', 'end-ns']) + it.close() + + def test_unknown_events(self): + iterparse = ET.iterparse + + events = ("start", "end", "bogus") + with open(SIMPLE_XMLFILE, "rb") as f: + with self.assertRaises(ValueError) as cm: + iterparse(f, events) + self.assertFalse(f.closed) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ValueError) as cm: + iterparse(SIMPLE_XMLFILE, events) + self.assertEqual(str(cm.exception), "unknown event 'bogus'") + del cm + gc_collect() + + def test_non_utf8(self): + source = io.BytesIO( + b"<?xml version='1.0' encoding='iso-8859-1'?>\n" + b"<body xmlns='http://&#233;ffbot.org/ns'\n" + b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") + events = ("start-ns",) + it = ET.iterparse(source, events) + self.assertEqual([(action, elem) for action, elem in it], [ + ('start-ns', ('', 'http://\xe9ffbot.org/ns')), + ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), + ]) + + def test_parsing_error(self): + source = io.BytesIO(b"<document />junk") + it = ET.iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + + def test_nonexistent_file(self): + with self.assertRaises(FileNotFoundError): + ET.iterparse("nonexistent") + + def test_resource_warnings_not_exhausted(self): + # Not exhausting the iterator still closes the underlying file (bpo-43292) + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + del it + gc_collect() + + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() + + def test_resource_warnings_failed_iteration(self): + self.addCleanup(os_helper.unlink, TESTFN) + with open(TESTFN, "wb") as f: + f.write(b"<document />junk") + + it = ET.iterparse(TESTFN) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with warnings_helper.check_no_resource_warning(self): + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + del cm, it + gc_collect() + + def test_resource_warnings_exhausted(self): + it = ET.iterparse(SIMPLE_XMLFILE) + with warnings_helper.check_no_resource_warning(self): + list(it) + del it + gc_collect() + + def test_close_not_exhausted(self): + iterparse = ET.iterparse + + it = iterparse(SIMPLE_XMLFILE) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + it = iterparse(SIMPLE_XMLFILE) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + def test_close_exhausted(self): + iterparse = ET.iterparse + it = iterparse(SIMPLE_XMLFILE) + list(it) + it.close() + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + with open(SIMPLE_XMLFILE, 'rb') as source: + it = iterparse(source) + list(it) + it.close() + self.assertFalse(source.closed) + with self.assertRaises(StopIteration): + next(it) + it.close() # idempotent + + class XMLPullParserTest(unittest.TestCase): def _feed(self, parser, data, chunk_size=None, flush=False): @@ -1723,6 +1775,8 @@ def __next__(self): def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + with self.assertRaisesRegex(ValueError, "unknown event 'bogus'"): + ET.XMLPullParser(events=(x.decode() for x in (b'start', b'end', b'bogus'))) @unittest.skipIf(pyexpat.version_info < (2, 6, 0), f'Expat {pyexpat.version_info} does not ' @@ -2960,6 +3014,50 @@ def element_factory(x, y): del b gc_collect() + def test_deepcopy_clear(self): + # Prevent crashes when __deepcopy__() clears the children list. + # See https://github.com/python/cpython/issues/133009. + class X(ET.Element): + def __deepcopy__(self, memo): + root.clear() + return self + + root = ET.Element('a') + evil = X('x') + root.extend([evil, ET.Element('y')]) + if is_python_implementation(): + # Mutating a list over which we iterate raises an error. + self.assertRaises(RuntimeError, copy.deepcopy, root) + else: + c = copy.deepcopy(root) + # In the C implementation, we can still copy the evil element. + self.assertListEqual(list(c), [evil]) + + def test_deepcopy_grow(self): + # Prevent crashes when __deepcopy__() mutates the children list. + # See https://github.com/python/cpython/issues/133009. + a = ET.Element('a') + b = ET.Element('b') + c = ET.Element('c') + + class X(ET.Element): + def __deepcopy__(self, memo): + root.append(a) + root.append(b) + return self + + root = ET.Element('top') + evil1, evil2 = X('1'), X('2') + root.extend([evil1, c, evil2]) + children = list(copy.deepcopy(root)) + # mock deep copies + self.assertIs(children[0], evil1) + self.assertIs(children[2], evil2) + # true deep copies + self.assertEqual(children[1].tag, c.tag) + self.assertEqual([c.tag for c in children[3:]], + [a.tag, b.tag, a.tag, b.tag]) + class MutationDeleteElementPath(str): def __new__(cls, elem, *args): diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 9ed0f4096a45e3..270b9d6da8e7b9 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -58,7 +58,7 @@ def test_del_attribute(self): self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) @support.skip_wasi_stack_overflow() - @unittest.skipIf(support.is_emscripten, "segfaults") + @support.skip_emscripten_stack_overflow() def test_trashcan(self): # If this test fails, it will most likely die via segfault. e = root = cET.Element('root') diff --git a/Lib/test/test_xxlimited.py b/Lib/test/test_xxlimited.py index 6dbfb3f439393c..b52e78bc4fb7e0 100644 --- a/Lib/test/test_xxlimited.py +++ b/Lib/test/test_xxlimited.py @@ -31,7 +31,7 @@ def test_foo(self): self.assertEqual(self.module.foo(1, 2), 3) def test_str(self): - self.assertTrue(issubclass(self.module.Str, str)) + self.assertIsSubclass(self.module.Str, str) self.assertIsNot(self.module.Str, str) custom_string = self.module.Str("abcd") diff --git a/Lib/test/test_zipapp.py b/Lib/test/test_zipapp.py index d4766c59a102db..8fb0a68deba535 100644 --- a/Lib/test/test_zipapp.py +++ b/Lib/test/test_zipapp.py @@ -259,7 +259,7 @@ def test_pack_to_fileobj(self): (source / '__main__.py').touch() target = io.BytesIO() zipapp.create_archive(str(source), target, interpreter='python') - self.assertTrue(target.getvalue().startswith(b'#!python\n')) + self.assertStartsWith(target.getvalue(), b'#!python\n') def test_read_shebang(self): # Test that we can read the shebang line correctly. @@ -300,7 +300,7 @@ def test_write_shebang_to_fileobj(self): zipapp.create_archive(str(source), str(target), interpreter='python') new_target = io.BytesIO() zipapp.create_archive(str(target), new_target, interpreter='python2.7') - self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + self.assertStartsWith(new_target.getvalue(), b'#!python2.7\n') def test_read_from_pathlike_obj(self): # Test that we can copy an archive using a path-like object @@ -326,7 +326,7 @@ def test_read_from_fileobj(self): new_target = io.BytesIO() temp_archive.seek(0) zipapp.create_archive(temp_archive, new_target, interpreter='python2.7') - self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + self.assertStartsWith(new_target.getvalue(), b'#!python2.7\n') def test_remove_shebang(self): # Test that we can remove the shebang from a file. diff --git a/Lib/test/test_zipfile/_path/_test_params.py b/Lib/test/test_zipfile/_path/_test_params.py index bc95b4ebf4a168..00a9eaf2f99c1a 100644 --- a/Lib/test/test_zipfile/_path/_test_params.py +++ b/Lib/test/test_zipfile/_path/_test_params.py @@ -1,5 +1,5 @@ -import types import functools +import types from ._itertools import always_iterable diff --git a/Lib/test/test_zipfile/_path/test_complexity.py b/Lib/test/test_zipfile/_path/test_complexity.py index b505dd7c376462..7c108fc6ab8191 100644 --- a/Lib/test/test_zipfile/_path/test_complexity.py +++ b/Lib/test/test_zipfile/_path/test_complexity.py @@ -8,10 +8,8 @@ from ._functools import compose from ._itertools import consume - from ._support import import_or_skip - big_o = import_or_skip('big_o') pytest = import_or_skip('pytest') diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index 0afabc0c6683c4..351d9eefeb09a5 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -1,6 +1,6 @@ +import contextlib import io import itertools -import contextlib import pathlib import pickle import stat @@ -9,12 +9,11 @@ import zipfile import zipfile._path -from test.support.os_helper import temp_dir, FakePath +from test.support.os_helper import FakePath, temp_dir from ._functools import compose from ._itertools import Counter - -from ._test_params import parameterize, Invoked +from ._test_params import Invoked, parameterize class jaraco: @@ -193,10 +192,10 @@ def test_encoding_warnings(self, alpharep): """EncodingWarning must blame the read_text and open calls.""" assert sys.flags.warn_default_encoding root = zipfile.Path(alpharep) - with self.assertWarns(EncodingWarning) as wc: + with self.assertWarns(EncodingWarning) as wc: # noqa: F821 (astral-sh/ruff#13296) root.joinpath("a.txt").read_text() assert __file__ == wc.filename - with self.assertWarns(EncodingWarning) as wc: + with self.assertWarns(EncodingWarning) as wc: # noqa: F821 (astral-sh/ruff#13296) root.joinpath("a.txt").open("r").close() assert __file__ == wc.filename @@ -275,7 +274,8 @@ def test_pathlike_construction(self, alpharep): """ zipfile_ondisk = self.zipfile_ondisk(alpharep) pathlike = FakePath(str(zipfile_ondisk)) - zipfile.Path(pathlike) + root = zipfile.Path(pathlike) + root.root.close() @pass_alpharep def test_traverse_pathlike(self, alpharep): @@ -364,6 +364,18 @@ def test_root_name(self, alpharep): root = zipfile.Path(alpharep) assert root.name == 'alpharep.zip' == root.filename.name + @pass_alpharep + def test_root_on_disk(self, alpharep): + """ + The name/stem of the root should match the zipfile on disk. + + This condition must hold across platforms. + """ + root = zipfile.Path(self.zipfile_ondisk(alpharep)) + assert root.name == 'alpharep.zip' == root.filename.name + assert root.stem == 'alpharep' == root.filename.stem + root.root.close() + @pass_alpharep def test_suffix(self, alpharep): """ @@ -564,11 +576,13 @@ def test_inheritance(self, alpharep): ) def test_pickle(self, alpharep, path_type, subpath): zipfile_ondisk = path_type(str(self.zipfile_ondisk(alpharep))) - - saved_1 = pickle.dumps(zipfile.Path(zipfile_ondisk, at=subpath)) + root = zipfile.Path(zipfile_ondisk, at=subpath) + saved_1 = pickle.dumps(root) + root.root.close() restored_1 = pickle.loads(saved_1) first, *rest = restored_1.iterdir() assert first.read_text(encoding='utf-8').startswith('content of ') + restored_1.root.close() @pass_alpharep def test_extract_orig_with_implied_dirs(self, alpharep): @@ -580,6 +594,7 @@ def test_extract_orig_with_implied_dirs(self, alpharep): # wrap the zipfile for its side effect zipfile.Path(zf) zf.extractall(source_path.parent) + zf.close() @pass_alpharep def test_getinfo_missing(self, alpharep): diff --git a/Lib/test/test_zipfile/_path/write-alpharep.py b/Lib/test/test_zipfile/_path/write-alpharep.py index 48c09b537179fd..7418391abadde5 100644 --- a/Lib/test/test_zipfile/_path/write-alpharep.py +++ b/Lib/test/test_zipfile/_path/write-alpharep.py @@ -1,4 +1,3 @@ from . import test_path - __name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep') diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index ae898150658565..6887a5e5cc4d18 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -312,26 +312,26 @@ def test_low_compression(self): self.assertEqual(openobj.read(1), b'2') def test_writestr_compression(self): - zipfp = zipfile.ZipFile(TESTFN2, "w") - zipfp.writestr("b.txt", "hello world", compress_type=self.compression) - info = zipfp.getinfo('b.txt') - self.assertEqual(info.compress_type, self.compression) + with zipfile.ZipFile(TESTFN2, "w") as zipfp: + zipfp.writestr("b.txt", "hello world", compress_type=self.compression) + info = zipfp.getinfo('b.txt') + self.assertEqual(info.compress_type, self.compression) def test_writestr_compresslevel(self): - zipfp = zipfile.ZipFile(TESTFN2, "w", compresslevel=1) - zipfp.writestr("a.txt", "hello world", compress_type=self.compression) - zipfp.writestr("b.txt", "hello world", compress_type=self.compression, - compresslevel=2) + with zipfile.ZipFile(TESTFN2, "w", compresslevel=1) as zipfp: + zipfp.writestr("a.txt", "hello world", compress_type=self.compression) + zipfp.writestr("b.txt", "hello world", compress_type=self.compression, + compresslevel=2) - # Compression level follows the constructor. - a_info = zipfp.getinfo('a.txt') - self.assertEqual(a_info.compress_type, self.compression) - self.assertEqual(a_info.compress_level, 1) + # Compression level follows the constructor. + a_info = zipfp.getinfo('a.txt') + self.assertEqual(a_info.compress_type, self.compression) + self.assertEqual(a_info.compress_level, 1) - # Compression level is overridden. - b_info = zipfp.getinfo('b.txt') - self.assertEqual(b_info.compress_type, self.compression) - self.assertEqual(b_info._compresslevel, 2) + # Compression level is overridden. + b_info = zipfp.getinfo('b.txt') + self.assertEqual(b_info.compress_type, self.compression) + self.assertEqual(b_info._compresslevel, 2) def test_read_return_size(self): # Issue #9837: ZipExtFile.read() shouldn't return more bytes @@ -898,6 +898,8 @@ def make_zip64_file( self, file_size_64_set=False, file_size_extra=False, compress_size_64_set=False, compress_size_extra=False, header_offset_64_set=False, header_offset_extra=False, + extensible_data=b'', + end_of_central_dir_size=None, offset_to_end_of_central_dir=None, ): """Generate bytes sequence for a zip with (incomplete) zip64 data. @@ -951,6 +953,12 @@ def make_zip64_file( central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields)) offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields)) + if end_of_central_dir_size is None: + end_of_central_dir_size = 44 + len(extensible_data) + if offset_to_end_of_central_dir is None: + offset_to_end_of_central_dir = (108 + + 8 * len(local_zip64_fields) + + 8 * len(central_zip64_fields)) local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields)) central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields)) @@ -979,14 +987,17 @@ def make_zip64_file( + filename + central_extra # Zip64 end of central directory - + b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-" - + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" + + b"PK\x06\x06" + + struct.pack('<Q', end_of_central_dir_size) + + b"-\x00-\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" + b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00" + central_dir_size + offset_to_central_dir + + extensible_data # Zip64 end of central directory locator - + b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01" - + b"\x00\x00\x00" + + b"PK\x06\x07\x00\x00\x00\x00" + + struct.pack('<Q', offset_to_end_of_central_dir) + + b"\x01\x00\x00\x00" # end of central directory + b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00" + b"\x00\x00\x00\x00" @@ -1017,6 +1028,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_file_size_extra)) self.assertIn('file size', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_file_size_extra))) # zip64 file size present, zip64 compress size present, one field in # extra, expecting two, equals missing compress size. @@ -1028,6 +1040,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_compress_size_extra)) self.assertIn('compress size', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra))) # zip64 compress size present, no fields in extra, expecting one, # equals missing compress size. @@ -1037,6 +1050,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_compress_size_extra)) self.assertIn('compress size', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_compress_size_extra))) # zip64 file size present, zip64 compress size present, zip64 header # offset present, two fields in extra, expecting three, equals missing @@ -1051,6 +1065,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) self.assertIn('header offset', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) # zip64 compress size present, zip64 header offset present, one field # in extra, expecting two, equals missing header offset @@ -1063,6 +1078,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) self.assertIn('header offset', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) # zip64 file size present, zip64 header offset present, one field in # extra, expecting two, equals missing header offset @@ -1075,6 +1091,7 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) self.assertIn('header offset', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) # zip64 header offset present, no fields in extra, expecting one, # equals missing header offset @@ -1086,6 +1103,63 @@ def test_bad_zip64_extra(self): with self.assertRaises(zipfile.BadZipFile) as e: zipfile.ZipFile(io.BytesIO(missing_header_offset_extra)) self.assertIn('header offset', str(e.exception).lower()) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(missing_header_offset_extra))) + + def test_bad_zip64_end_of_central_dir(self): + zipdata = self.make_zip64_file(end_of_central_dir_size=0) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + zipdata = self.make_zip64_file(end_of_central_dir_size=100) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + zipdata = self.make_zip64_file(offset_to_end_of_central_dir=0) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*record'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + zipdata = self.make_zip64_file(offset_to_end_of_central_dir=1000) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Corrupt.*locator'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + def test_zip64_end_of_central_dir_record_not_found(self): + zipdata = self.make_zip64_file() + zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4) + with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + zipdata = self.make_zip64_file( + extensible_data=b'\xca\xfe\x04\x00\x00\x00data') + zipdata = zipdata.replace(b"PK\x06\x06", b'\x00'*4) + with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): + zipfile.ZipFile(io.BytesIO(zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(zipdata))) + + def test_zip64_extensible_data(self): + # These values are what is set in the make_zip64_file method. + expected_file_size = 8 + expected_compress_size = 8 + expected_header_offset = 0 + expected_content = b"test1234" + + zipdata = self.make_zip64_file( + extensible_data=b'\xca\xfe\x04\x00\x00\x00data') + with zipfile.ZipFile(io.BytesIO(zipdata)) as zf: + zinfo = zf.infolist()[0] + self.assertEqual(zinfo.file_size, expected_file_size) + self.assertEqual(zinfo.compress_size, expected_compress_size) + self.assertEqual(zinfo.header_offset, expected_header_offset) + self.assertEqual(zf.read(zinfo), expected_content) + self.assertTrue(zipfile.is_zipfile(io.BytesIO(zipdata))) + + with self.assertRaisesRegex(zipfile.BadZipFile, 'record not found'): + zipfile.ZipFile(io.BytesIO(b'prepended' + zipdata)) + self.assertFalse(zipfile.is_zipfile(io.BytesIO(b'prepended' + zipdata))) def test_generated_valid_zip64_extra(self): # These values are what is set in the make_zip64_file method. @@ -1991,6 +2065,25 @@ def test_is_zip_erroneous_file(self): self.assertFalse(zipfile.is_zipfile(fp)) fp.seek(0, 0) self.assertFalse(zipfile.is_zipfile(fp)) + # - passing non-zipfile with ZIP header elements + # data created using pyPNG like so: + # d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)] + # w = png.Writer(1,2,alpha=True,compression=0) + # f = open('onepix.png', 'wb') + # w.write(f, d) + # w.close() + data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00" + b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I" + b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07" + b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82") + # - passing a filename + with open(TESTFN, "wb") as fp: + fp.write(data) + self.assertFalse(zipfile.is_zipfile(TESTFN)) + # - passing a file-like object + fp = io.BytesIO() + fp.write(data) + self.assertFalse(zipfile.is_zipfile(fp)) def test_damaged_zipfile(self): """Check that zipfiles with missing bytes at the end raise BadZipFile.""" @@ -2237,6 +2330,7 @@ def test_empty_zipfile(self): zipf = zipfile.ZipFile(TESTFN, mode="r") except zipfile.BadZipFile: self.fail("Unable to create empty ZIP file in 'w' mode") + zipf.close() zipf = zipfile.ZipFile(TESTFN, mode="a") zipf.close() @@ -2244,6 +2338,7 @@ def test_empty_zipfile(self): zipf = zipfile.ZipFile(TESTFN, mode="r") except: self.fail("Unable to create empty ZIP file in 'a' mode") + zipf.close() def test_open_empty_file(self): # Issue 1710703: Check that opening a file with less than 22 bytes @@ -2436,6 +2531,10 @@ def test_decompress_without_3rd_party_library(self): @requires_zlib() def test_full_overlap_different_names(self): + # The ZIP file contains two central directory entries with + # different names which refer to the same local header. + # The name of the local header matches the name of the first + # central directory entry. data = ( b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed' @@ -2465,6 +2564,10 @@ def test_full_overlap_different_names(self): @requires_zlib() def test_full_overlap_different_names2(self): + # The ZIP file contains two central directory entries with + # different names which refer to the same local header. + # The name of the local header matches the name of the second + # central directory entry. data = ( b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' @@ -2496,6 +2599,8 @@ def test_full_overlap_different_names2(self): @requires_zlib() def test_full_overlap_same_name(self): + # The ZIP file contains two central directory entries with + # the same name which refer to the same local header. data = ( b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' @@ -2528,6 +2633,8 @@ def test_full_overlap_same_name(self): @requires_zlib() def test_quoted_overlap(self): + # The ZIP file contains two files. The second local header + # is contained in the range of the first file. data = ( b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' @@ -2559,6 +2666,7 @@ def test_quoted_overlap(self): @requires_zlib() def test_overlap_with_central_dir(self): + # The local header offset is equal to the central directory offset. data = ( b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' @@ -2573,11 +2681,15 @@ def test_overlap_with_central_dir(self): self.assertEqual(zi.header_offset, 0) self.assertEqual(zi.compress_size, 11) self.assertEqual(zi.file_size, 1033) + # Found central directory signature PK\x01\x02 instead of + # local header signature PK\x03\x04. with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'): zipf.read('a') @requires_zlib() def test_overlap_with_archive_comment(self): + # The local header is written after the central directory, + # in the archive comment. data = ( b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z' b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00' @@ -3179,7 +3291,7 @@ def test_write_dir(self): with zipfile.ZipFile(TESTFN, "w") as zipf: zipf.write(dirpath) zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("/x/")) + self.assertEndsWith(zinfo.filename, "/x/") self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) zipf.write(dirpath, "y") zinfo = zipf.filelist[1] @@ -3187,7 +3299,7 @@ def test_write_dir(self): self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) with zipfile.ZipFile(TESTFN, "r") as zipf: zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("/x/")) + self.assertEndsWith(zinfo.filename, "/x/") self.assertEqual(zinfo.external_attr, (mode << 16) | 0x10) zinfo = zipf.filelist[1] self.assertTrue(zinfo.filename, "y/") @@ -3207,7 +3319,7 @@ def test_writestr_dir(self): self.assertEqual(zinfo.external_attr, (0o40775 << 16) | 0x10) with zipfile.ZipFile(TESTFN, "r") as zipf: zinfo = zipf.filelist[0] - self.assertTrue(zinfo.filename.endswith("x/")) + self.assertEndsWith(zinfo.filename, "x/") self.assertEqual(zinfo.external_attr, (0o40775 << 16) | 0x10) target = os.path.join(TESTFN2, "target") os.mkdir(target) @@ -3451,60 +3563,6 @@ def test_execute_zip64(self): self.assertIn(b'number in executable: 5', output) -class TestDataOffsetPrependedZip(unittest.TestCase): - """Test .data_offset on reading zip files with an executable prepended.""" - - def setUp(self): - self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata') - self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata') - - def _test_data_offset(self, name): - with zipfile.ZipFile(name) as zipfp: - self.assertEqual(zipfp.data_offset, 713) - - def test_data_offset_with_exe_prepended(self): - self._test_data_offset(self.exe_zip) - - def test_data_offset_with_exe_prepended_zip64(self): - self._test_data_offset(self.exe_zip64) - -class TestDataOffsetZipWrite(unittest.TestCase): - """Test .data_offset for ZipFile opened in write mode.""" - - def setUp(self): - os.mkdir(TESTFNDIR) - self.addCleanup(rmtree, TESTFNDIR) - self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip') - - def test_data_offset_write_no_prefix(self): - with io.BytesIO() as fp: - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertEqual(zipfp.data_offset, 0) - - def test_data_offset_write_with_prefix(self): - with io.BytesIO() as fp: - fp.write(b"this is a prefix") - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertEqual(zipfp.data_offset, 16) - - def test_data_offset_append_with_bad_zip(self): - with io.BytesIO() as fp: - fp.write(b"this is a prefix") - with zipfile.ZipFile(fp, "a") as zipfp: - self.assertEqual(zipfp.data_offset, 16) - - def test_data_offset_write_no_tell(self): - # The initializer in ZipFile checks if tell raises AttributeError or - # OSError when creating a file in write mode when deducing the offset - # of the beginning of zip data - class NoTellBytesIO(io.BytesIO): - def tell(self): - raise OSError("Unimplemented!") - with NoTellBytesIO() as fp: - with zipfile.ZipFile(fp, "w") as zipfp: - self.assertIsNone(zipfp.data_offset) - - class EncodedMetadataTests(unittest.TestCase): file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' file_content = [ @@ -3642,7 +3700,7 @@ def test_cli_with_metadata_encoding_extract(self): except OSError: pass except UnicodeEncodeError: - self.skipTest(f'cannot encode file name {fn!r}') + self.skipTest(f'cannot encode file name {fn!a}') zipfile.main(["--metadata-encoding=shift_jis", "-e", TESTFN, TESTFN2]) listing = os.listdir(TESTFN2) diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index 1f288c8b45d589..b5b4acf5f850be 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -835,11 +835,11 @@ def doTraceback(self, module): s = io.StringIO() print_tb(tb, 1, s) - self.assertTrue(s.getvalue().endswith( + self.assertEndsWith(s.getvalue(), ' def do_raise(): raise TypeError\n' '' if support.has_no_debug_ranges() else ' ^^^^^^^^^^^^^^^\n' - )) + ) else: raise AssertionError("This ought to be impossible") diff --git a/Lib/test/test_zoneinfo/test_zoneinfo.py b/Lib/test/test_zoneinfo/test_zoneinfo.py index d2845495c7f8b6..2f9a5dfc1a89a0 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo.py @@ -18,10 +18,11 @@ from functools import cached_property from test.support import MISSING_C_DOCSTRINGS -from test.support.os_helper import EnvironmentVarGuard +from test.support.os_helper import EnvironmentVarGuard, FakePath from test.test_zoneinfo import _support as test_support from test.test_zoneinfo._support import TZPATH_TEST_LOCK, ZoneInfoTestBase from test.support.import_helper import import_module, CleanImport +from test.support.script_helper import assert_python_ok lzma = import_module('lzma') py_zoneinfo, c_zoneinfo = test_support.get_modules() @@ -58,6 +59,10 @@ def tearDownModule(): shutil.rmtree(TEMP_DIR) +class CustomError(Exception): + pass + + class TzPathUserMixin: """ Adds a setUp() and tearDown() to make TZPATH manipulations thread-safe. @@ -404,6 +409,25 @@ def test_time_fixed_offset(self): self.assertEqual(t.utcoffset(), offset.utcoffset) self.assertEqual(t.dst(), offset.dst) + def test_cache_exception(self): + class Incomparable(str): + eq_called = False + def __eq__(self, other): + self.eq_called = True + raise CustomError + __hash__ = str.__hash__ + + key = "America/Los_Angeles" + tz1 = self.klass(key) + key = Incomparable(key) + try: + tz2 = self.klass(key) + except CustomError: + self.assertTrue(key.eq_called) + else: + self.assertFalse(key.eq_called) + self.assertIs(tz2, tz1) + class CZoneInfoTest(ZoneInfoTest): module = c_zoneinfo @@ -1507,6 +1531,26 @@ def test_clear_cache_two_keys(self): self.assertIsNot(dub0, dub1) self.assertIs(tok0, tok1) + def test_clear_cache_refleak(self): + class Stringy(str): + allow_comparisons = True + def __eq__(self, other): + if not self.allow_comparisons: + raise CustomError + return super().__eq__(other) + __hash__ = str.__hash__ + + key = Stringy("America/Los_Angeles") + self.klass(key) + key.allow_comparisons = False + try: + # Note: This is try/except rather than assertRaises because + # there is no guarantee that the key is even still in the cache, + # or that the key for the cache is the original `key` object. + self.klass.clear_cache(only_keys="America/Los_Angeles") + except CustomError: + pass + class CZoneInfoCacheTest(ZoneInfoCacheTest): module = c_zoneinfo @@ -1740,6 +1784,7 @@ def test_reset_tzpath_relative_paths(self): ("/usr/share/zoneinfo", "../relative/path",), ("path/to/somewhere", "../relative/path",), ("/usr/share/zoneinfo", "path/to/somewhere", "../relative/path",), + (FakePath("path/to/somewhere"),) ] for input_paths in bad_values: with self.subTest(input_paths=input_paths): @@ -1751,6 +1796,9 @@ def test_tzpath_type_error(self): "/etc/zoneinfo:/usr/share/zoneinfo", b"/etc/zoneinfo:/usr/share/zoneinfo", 0, + (b"/bytes/path", "/valid/path"), + (FakePath(b"/bytes/path"),), + (0,), ] for bad_value in bad_values: @@ -1761,6 +1809,7 @@ def test_tzpath_type_error(self): def test_tzpath_attribute(self): tzpath_0 = [f"{DRIVE}/one", f"{DRIVE}/two"] tzpath_1 = [f"{DRIVE}/three"] + tzpath_pathlike = (FakePath(f"{DRIVE}/usr/share/zoneinfo"),) with self.tzpath_context(tzpath_0): query_0 = self.module.TZPATH @@ -1768,8 +1817,12 @@ def test_tzpath_attribute(self): with self.tzpath_context(tzpath_1): query_1 = self.module.TZPATH + with self.tzpath_context(tzpath_pathlike): + query_pathlike = self.module.TZPATH + self.assertSequenceEqual(tzpath_0, query_0) self.assertSequenceEqual(tzpath_1, query_1) + self.assertSequenceEqual(tuple([os.fspath(p) for p in tzpath_pathlike]), query_pathlike) class CTzPathTest(TzPathTest): @@ -1903,6 +1956,26 @@ class CTestModule(TestModule): module = c_zoneinfo +class MiscTests(unittest.TestCase): + def test_pydatetime(self): + # Test that zoneinfo works if the C implementation of datetime + # is not available and the Python implementation of datetime is used. + # The Python implementation of zoneinfo should be used in thet case. + # + # Run the test in a subprocess, as importing _zoneinfo with + # _datettime disabled causes crash in the previously imported + # _zoneinfo. + assert_python_ok('-c', '''if 1: + import sys + sys.modules['_datetime'] = None + import datetime + import zoneinfo + tzinfo = zoneinfo.ZoneInfo('Europe/London') + datetime.datetime(2025, 10, 26, 2, 0, tzinfo=tzinfo) + ''', + PYTHONTZPATH=str(ZONEINFO_DATA.tzpath)) + + class ExtensionBuiltTest(unittest.TestCase): """Smoke test to ensure that the C and Python extensions are both tested. @@ -1915,8 +1988,8 @@ class ExtensionBuiltTest(unittest.TestCase): def test_cache_location(self): # The pure Python version stores caches on attributes, but the C # extension stores them in C globals (at least for now) - self.assertFalse(hasattr(c_zoneinfo.ZoneInfo, "_weak_cache")) - self.assertTrue(hasattr(py_zoneinfo.ZoneInfo, "_weak_cache")) + self.assertNotHasAttr(c_zoneinfo.ZoneInfo, "_weak_cache") + self.assertHasAttr(py_zoneinfo.ZoneInfo, "_weak_cache") def test_gc_tracked(self): import gc diff --git a/Lib/test/test_zoneinfo/test_zoneinfo_property.py b/Lib/test/test_zoneinfo/test_zoneinfo_property.py index feaa77f3e7f0b9..c00815e2fd4c36 100644 --- a/Lib/test/test_zoneinfo/test_zoneinfo_property.py +++ b/Lib/test/test_zoneinfo/test_zoneinfo_property.py @@ -146,20 +146,22 @@ def setUp(self): @add_key_examples def test_pickle_unpickle_cache(self, key): zi = self.klass(key) - pkl_str = pickle.dumps(zi) - zi_rt = pickle.loads(pkl_str) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIs(zi, zi_rt) + self.assertIs(zi, zi_rt) @hypothesis.given(key=valid_keys()) @add_key_examples def test_pickle_unpickle_no_cache(self, key): zi = self.klass.no_cache(key) - pkl_str = pickle.dumps(zi) - zi_rt = pickle.loads(pkl_str) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + pkl_str = pickle.dumps(zi, proto) + zi_rt = pickle.loads(pkl_str) - self.assertIsNot(zi, zi_rt) - self.assertEqual(str(zi), str(zi_rt)) + self.assertIsNot(zi, zi_rt) + self.assertEqual(str(zi), str(zi_rt)) @hypothesis.given(key=valid_keys()) @add_key_examples diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py index f4a25376e5234a..cf618534add387 100644 --- a/Lib/test/test_zstd.py +++ b/Lib/test/test_zstd.py @@ -63,11 +63,18 @@ TRAINED_DICT = None -SUPPORT_MULTITHREADING = False +# Cannot be deferred to setup as it is used to check whether or not to skip +# tests +try: + SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) +except Exception: + SUPPORT_MULTITHREADING = False + +C_INT_MIN = -(2**31) +C_INT_MAX = (2**31) - 1 + def setUpModule(): - global SUPPORT_MULTITHREADING - SUPPORT_MULTITHREADING = CompressionParameter.nb_workers.bounds() != (0, 0) # uncompressed size 130KB, more than a zstd block. # with a frame epilogue, 4 bytes checksum. global DAT_130K_D @@ -196,14 +203,21 @@ def test_simple_compress_bad_args(self): self.assertRaises(TypeError, ZstdCompressor, zstd_dict=b"abcd1234") self.assertRaises(TypeError, ZstdCompressor, zstd_dict={1: 2, 3: 4}) - with self.assertRaises(ValueError): - ZstdCompressor(2**31) - with self.assertRaises(ValueError): - ZstdCompressor(options={2**31: 100}) + # valid range for compression level is [-(1<<17), 22] + msg = r'illegal compression level {}; the valid range is \[-?\d+, -?\d+\]' + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MAX)): + ZstdCompressor(C_INT_MAX) + with self.assertRaisesRegex(ValueError, msg.format(C_INT_MIN)): + ZstdCompressor(C_INT_MIN) + msg = r'illegal compression level; the valid range is \[-?\d+, -?\d+\]' + with self.assertRaisesRegex(ValueError, msg): + ZstdCompressor(level=-(2**1000)) + with self.assertRaisesRegex(ValueError, msg): + ZstdCompressor(level=2**1000) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.window_log: 100}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={3333: 100}) # Method bad arguments @@ -254,43 +268,57 @@ def test_compress_parameters(self): } ZstdCompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() - d1[CompressionParameter.ldm_bucket_size_log] = 2**31 - self.assertRaises(ValueError, ZstdCompressor, options=d1) + # larger than signed int + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MAX + with self.assertRaises(ValueError): + ZstdCompressor(options=d1) + # smaller than signed int + d1[CompressionParameter.ldm_bucket_size_log] = C_INT_MIN + with self.assertRaises(ValueError): + ZstdCompressor(options=d1) - # clamp compressionLevel + # out of bounds compression level level_min, level_max = CompressionParameter.compression_level.bounds() - compress(b'', level_max+1) - compress(b'', level_min-1) - - compress(b'', options={CompressionParameter.compression_level:level_max+1}) - compress(b'', options={CompressionParameter.compression_level:level_min-1}) + with self.assertRaises(ValueError): + compress(b'', level_max+1) + with self.assertRaises(ValueError): + compress(b'', level_min-1) + with self.assertRaises(ValueError): + compress(b'', 2**1000) + with self.assertRaises(ValueError): + compress(b'', -(2**1000)) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_max+1}) + with self.assertRaises(ValueError): + compress(b'', options={ + CompressionParameter.compression_level: level_min-1}) # zstd lib doesn't support MT compression if not SUPPORT_MULTITHREADING: - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.nb_workers:4}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.job_size:4}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdCompressor(options={CompressionParameter.overlap_log:4}) # out of bounds error msg option = {CompressionParameter.window_log:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd compression parameter "window_log", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + "compression parameter 'window_log' received an illegal value 100; " + r'the valid range is \[-?\d+, -?\d+\]', + ): compress(b'', options=option) def test_unknown_compression_parameter(self): KEY = 100001234 option = {CompressionParameter.compression_level: 10, KEY: 200000000} - pattern = r'Zstd compression parameter.*?"unknown parameter \(key %d\)"' \ - % KEY - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid compression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdCompressor(options=option) @unittest.skipIf(not SUPPORT_MULTITHREADING, @@ -371,6 +399,115 @@ def test_compress_empty(self): c = ZstdCompressor() self.assertNotEqual(c.compress(b'', c.FLUSH_FRAME), b'') + def test_set_pledged_input_size(self): + DAT = DECOMPRESSED_100_PLUS_32KB + CHUNK_SIZE = len(DAT) // 3 + + # wrong value + c = ZstdCompressor() + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(-300) + # overflow + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64) + # ZSTD_CONTENTSIZE_ERROR is invalid + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64-2) + # ZSTD_CONTENTSIZE_UNKNOWN should use None + with self.assertRaisesRegex(ValueError, + r'should be a positive int less than \d+'): + c.set_pledged_input_size(2**64-1) + + # check valid values are settable + c.set_pledged_input_size(2**63) + c.set_pledged_input_size(2**64-3) + + # check that zero means empty frame + c = ZstdCompressor(level=1) + c.set_pledged_input_size(0) + c.compress(b'') + dat = c.flush() + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, 0) + + + # wrong mode + c = ZstdCompressor(level=1) + c.compress(b'123456') + self.assertEqual(c.last_mode, c.CONTINUE) + with self.assertRaisesRegex(ValueError, + r'last_mode == FLUSH_FRAME'): + c.set_pledged_input_size(300) + + # None value + c = ZstdCompressor(level=1) + c.set_pledged_input_size(None) + dat = c.compress(DAT) + c.flush() + + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, None) + + # correct value + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)) + + chunks = [] + posi = 0 + while posi < len(DAT): + dat = c.compress(DAT[posi:posi+CHUNK_SIZE]) + posi += CHUNK_SIZE + chunks.append(dat) + + dat = c.flush() + chunks.append(dat) + chunks = b''.join(chunks) + + ret = get_frame_info(chunks) + self.assertEqual(ret.decompressed_size, len(DAT)) + self.assertEqual(decompress(chunks), DAT) + + c.set_pledged_input_size(len(DAT)) # the second frame + dat = c.compress(DAT) + c.flush() + + ret = get_frame_info(dat) + self.assertEqual(ret.decompressed_size, len(DAT)) + self.assertEqual(decompress(dat), DAT) + + # not enough data + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)+1) + + for start in range(0, len(DAT), CHUNK_SIZE): + end = min(start+CHUNK_SIZE, len(DAT)) + _dat = c.compress(DAT[start:end]) + + with self.assertRaises(ZstdError): + c.flush() + + # too much data + c = ZstdCompressor(level=1) + c.set_pledged_input_size(len(DAT)) + + for start in range(0, len(DAT), CHUNK_SIZE): + end = min(start+CHUNK_SIZE, len(DAT)) + _dat = c.compress(DAT[start:end]) + + with self.assertRaises(ZstdError): + c.compress(b'extra', ZstdCompressor.FLUSH_FRAME) + + # content size not set if content_size_flag == 0 + c = ZstdCompressor(options={CompressionParameter.content_size_flag: 0}) + c.set_pledged_input_size(10) + dat1 = c.compress(b"hello") + dat2 = c.compress(b"world") + dat3 = c.flush() + frame_data = get_frame_info(dat1 + dat2 + dat3) + self.assertIsNone(frame_data.decompressed_size) + + class DecompressorTestCase(unittest.TestCase): def test_simple_decompress_bad_args(self): @@ -385,12 +522,22 @@ def test_simple_decompress_bad_args(self): self.assertRaises(TypeError, ZstdDecompressor, options=b'abc') with self.assertRaises(ValueError): - ZstdDecompressor(options={2**31 : 100}) + ZstdDecompressor(options={C_INT_MAX: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={C_INT_MIN: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={0: C_INT_MAX}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={2**1000: 100}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={-(2**1000): 100}) + with self.assertRaises(OverflowError): + ZstdDecompressor(options={0: -(2**1000)}) - with self.assertRaises(ZstdError): - ZstdDecompressor(options={DecompressionParameter.window_log_max:100}) - with self.assertRaises(ZstdError): - ZstdDecompressor(options={3333 : 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={DecompressionParameter.window_log_max: 100}) + with self.assertRaises(ValueError): + ZstdDecompressor(options={3333: 100}) empty = compress(b'') lzd = ZstdDecompressor() @@ -403,26 +550,52 @@ def test_decompress_parameters(self): d = {DecompressionParameter.window_log_max : 15} ZstdDecompressor(options=d) - # larger than signed int, ValueError d1 = d.copy() - d1[DecompressionParameter.window_log_max] = 2**31 - self.assertRaises(ValueError, ZstdDecompressor, None, d1) + # larger than signed int + d1[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + # smaller than signed int + d1[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(OverflowError): + ZstdDecompressor(None, d1) + + d1[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): + ZstdDecompressor(None, d1) + d1[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): + ZstdDecompressor(None, d1) # out of bounds error msg options = {DecompressionParameter.window_log_max:100} - with self.assertRaisesRegex(ZstdError, - (r'Error when setting zstd decompression parameter "window_log_max", ' - r'it should \d+ <= value <= \d+, provided value is 100\. ' - r'\(zstd v\d\.\d\.\d, (?:32|64)-bit build\)')): + with self.assertRaisesRegex( + ValueError, + "decompression parameter 'window_log_max' received an illegal value 100; " + r'the valid range is \[-?\d+, -?\d+\]', + ): + decompress(b'', options=options) + + # out of bounds deecompression parameter + options[DecompressionParameter.window_log_max] = C_INT_MAX + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = C_INT_MIN + with self.assertRaises(ValueError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = 2**1000 + with self.assertRaises(OverflowError): + decompress(b'', options=options) + options[DecompressionParameter.window_log_max] = -(2**1000) + with self.assertRaises(OverflowError): decompress(b'', options=options) def test_unknown_decompression_parameter(self): KEY = 100001234 options = {DecompressionParameter.window_log_max: DecompressionParameter.window_log_max.bounds()[1], KEY: 200000000} - pattern = r'Zstd decompression parameter.*?"unknown parameter \(key %d\)"' \ - % KEY - with self.assertRaisesRegex(ZstdError, pattern): + pattern = rf"invalid decompression parameter 'unknown parameter \(key {KEY}\)'" + with self.assertRaisesRegex(ValueError, pattern): ZstdDecompressor(options=options) def test_decompress_epilogue_flags(self): @@ -507,7 +680,7 @@ def test_decompress_epilogue_flags(self): self.assertFalse(d.needs_input) def test_decompressor_arg(self): - zd = ZstdDict(b'12345678', True) + zd = ZstdDict(b'12345678', is_raw=True) with self.assertRaises(TypeError): d = ZstdDecompressor(zstd_dict={}) @@ -1021,6 +1194,10 @@ def test_decompressor_skippable(self): class ZstdDictTestCase(unittest.TestCase): def test_is_raw(self): + # must be passed as a keyword argument + with self.assertRaises(TypeError): + ZstdDict(bytes(8), True) + # content < 8 b = b'1234567' with self.assertRaises(ValueError): @@ -1068,35 +1245,49 @@ def test_invalid_dict(self): # corrupted zd = ZstdDict(dict_content, is_raw=False) - with self.assertRaisesRegex(ZstdError, r'ZSTD_CDict.*?corrupted'): + with self.assertRaisesRegex(ZstdError, r'ZSTD_CDict.*?content\.$'): ZstdCompressor(zstd_dict=zd.as_digested_dict) - with self.assertRaisesRegex(ZstdError, r'ZSTD_DDict.*?corrupted'): + with self.assertRaisesRegex(ZstdError, r'ZSTD_DDict.*?content\.$'): ZstdDecompressor(zd) # wrong type - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): - ZstdCompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=[zd, 1]) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=(zd, 1.0)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdCompressor(zstd_dict=(zd,)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdCompressor(zstd_dict=(zd, 3)) - - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): - ZstdDecompressor(zstd_dict=(zd, b'123')) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaises(OverflowError): + ZstdCompressor(zstd_dict=(zd, 2**1000)) + with self.assertRaises(OverflowError): + ZstdCompressor(zstd_dict=(zd, -2**1000)) + + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor(zstd_dict=[zd, 1]) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor(zstd_dict=(zd, 1.0)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): + ZstdDecompressor((zd,)) + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 1, 2)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, -1)) - with self.assertRaisesRegex(TypeError, r'should be ZstdDict object'): + with self.assertRaisesRegex(TypeError, r'should be a ZstdDict object'): ZstdDecompressor((zd, 3)) + with self.assertRaises(OverflowError): + ZstdDecompressor((zd, 2**1000)) + with self.assertRaises(OverflowError): + ZstdDecompressor((zd, -2**1000)) def test_train_dict(self): - - TRAINED_DICT = train_dict(SAMPLES, DICT_SIZE1) - ZstdDict(TRAINED_DICT.dict_content, False) + ZstdDict(TRAINED_DICT.dict_content, is_raw=False) self.assertNotEqual(TRAINED_DICT.dict_id, 0) self.assertGreater(len(TRAINED_DICT.dict_content), 0) @@ -1174,43 +1365,91 @@ def test_finalize_dict_arguments(self): def test_train_dict_c(self): # argument wrong type with self.assertRaises(TypeError): - _zstd._train_dict({}, (), 100) + _zstd.train_dict({}, (), 100) + with self.assertRaises(TypeError): + _zstd.train_dict(bytearray(), (), 100) + with self.assertRaises(TypeError): + _zstd.train_dict(b'', 99, 100) + with self.assertRaises(TypeError): + _zstd.train_dict(b'', [], 100) with self.assertRaises(TypeError): - _zstd._train_dict(b'', 99, 100) + _zstd.train_dict(b'', (), 100.1) with self.assertRaises(TypeError): - _zstd._train_dict(b'', (), 100.1) + _zstd.train_dict(b'', (99.1,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'abc', (4, -1), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'abc', (2,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (99,), 100) # size > size_t with self.assertRaises(ValueError): - _zstd._train_dict(b'', (2**64+1,), 100) + _zstd.train_dict(b'', (2**1000,), 100) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (-2**1000,), 100) # dict_size <= 0 with self.assertRaises(ValueError): - _zstd._train_dict(b'', (), 0) + _zstd.train_dict(b'', (), 0) + with self.assertRaises(ValueError): + _zstd.train_dict(b'', (), -1) + + with self.assertRaises(ZstdError): + _zstd.train_dict(b'', (), 1) def test_finalize_dict_c(self): with self.assertRaises(TypeError): - _zstd._finalize_dict(1, 2, 3, 4, 5) + _zstd.finalize_dict(1, 2, 3, 4, 5) # argument wrong type with self.assertRaises(TypeError): - _zstd._finalize_dict({}, b'', (), 100, 5) + _zstd.finalize_dict({}, b'', (), 100, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(bytearray(TRAINED_DICT.dict_content), b'', (), 100, 5) with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, {}, (), 100, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, {}, (), 100, 5) with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', 99, 100, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, bytearray(), (), 100, 5) with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', (), 100.1, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', 99, 100, 5) with self.assertRaises(TypeError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 5.1) + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', [], 100, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100.1, 5) + with self.assertRaises(TypeError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 5.1) + + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'abc', (4, -1), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'abc', (2,), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (99,), 100, 5) # size > size_t with self.assertRaises(ValueError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', (2**64+1,), 100, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (2**1000,), 100, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (-2**1000,), 100, 5) # dict_size <= 0 with self.assertRaises(ValueError): - _zstd._finalize_dict(TRAINED_DICT.dict_content, b'', (), 0, 5) + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 0, 5) + with self.assertRaises(ValueError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), -1, 5) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 2**1000, 5) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), -2**1000, 5) + + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 2**1000) + with self.assertRaises(OverflowError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, -2**1000) + + with self.assertRaises(ZstdError): + _zstd.finalize_dict(TRAINED_DICT.dict_content, b'', (), 100, 5) def test_train_buffer_protocol_samples(self): def _nbytes(dat): @@ -1232,25 +1471,25 @@ def _nbytes(dat): # wrong size list with self.assertRaisesRegex(ValueError, "The samples size tuple doesn't match the concatenation's size"): - _zstd._train_dict(concatenation, tuple(wrong_size_lst), 100*_1K) + _zstd.train_dict(concatenation, tuple(wrong_size_lst), 100*_1K) # correct size list - _zstd._train_dict(concatenation, tuple(correct_size_lst), 3*_1K) + _zstd.train_dict(concatenation, tuple(correct_size_lst), 3*_1K) # wrong size list with self.assertRaisesRegex(ValueError, "The samples size tuple doesn't match the concatenation's size"): - _zstd._finalize_dict(TRAINED_DICT.dict_content, + _zstd.finalize_dict(TRAINED_DICT.dict_content, concatenation, tuple(wrong_size_lst), 300*_1K, 5) # correct size list - _zstd._finalize_dict(TRAINED_DICT.dict_content, + _zstd.finalize_dict(TRAINED_DICT.dict_content, concatenation, tuple(correct_size_lst), 300*_1K, 5) def test_as_prefix(self): # V1 V1 = THIS_FILE_BYTES - zd = ZstdDict(V1, True) + zd = ZstdDict(V1, is_raw=True) # V2 mid = len(V1) // 2 @@ -1266,7 +1505,7 @@ def test_as_prefix(self): self.assertEqual(decompress(dat, zd.as_prefix), V2) # use wrong prefix - zd2 = ZstdDict(SAMPLES[0], True) + zd2 = ZstdDict(SAMPLES[0], is_raw=True) try: decompressed = decompress(dat, zd2.as_prefix) except ZstdError: # expected @@ -1420,11 +1659,12 @@ def test_init_bad_mode(self): with self.assertRaises(ValueError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "rw") - with self.assertRaisesRegex(TypeError, r"NOT be CompressionParameter"): + with self.assertRaisesRegex(TypeError, + r"not be a CompressionParameter"): ZstdFile(io.BytesIO(), 'rb', options={CompressionParameter.compression_level:5}) with self.assertRaisesRegex(TypeError, - r"NOT be DecompressionParameter"): + r"not be a DecompressionParameter"): ZstdFile(io.BytesIO(), 'wb', options={DecompressionParameter.window_log_max:21}) @@ -1435,19 +1675,19 @@ def test_init_bad_check(self): with self.assertRaises(TypeError): ZstdFile(io.BytesIO(), "w", level='asd') # CHECK_UNKNOWN and anything above CHECK_ID_MAX should be invalid. - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={999:9999}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(), "w", options={CompressionParameter.window_log:99}) with self.assertRaises(TypeError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), "r", options=33) - with self.assertRaises(ValueError): + with self.assertRaises(OverflowError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={DecompressionParameter.window_log_max:2**31}) - with self.assertRaises(ZstdError): + with self.assertRaises(ValueError): ZstdFile(io.BytesIO(COMPRESSED_100_PLUS_32KB), options={444:333}) @@ -1463,7 +1703,7 @@ def test_init_close_fp(self): tmp_f.write(DAT_130K_C) filename = tmp_f.name - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): ZstdFile(filename, options={'a':'b'}) # for PyPy @@ -1682,10 +1922,10 @@ def test_read_incomplete(self): # Trailing data isn't a valid compressed stream with ZstdFile(io.BytesIO(self.FRAME_42 + b'12345')) as f: - self.assertEqual(f.read(), self.DECOMPRESSED_42) + self.assertRaises(ZstdError, f.read) with ZstdFile(io.BytesIO(SKIPPABLE_FRAME + b'12345')) as f: - self.assertEqual(f.read(), b'') + self.assertRaises(ZstdError, f.read) def test_read_truncated(self): # Drop stream epilogue: 4 bytes checksum @@ -2428,15 +2668,18 @@ def test_buffer_protocol(self): class FreeThreadingMethodTests(unittest.TestCase): - @unittest.skipUnless(Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_compress_locking(self): input = b'a'* (16*_1K) num_threads = 8 + # gh-136394: the first output of .compress() includes the frame header + # we run the first .compress() call outside of the threaded portion + # to make the test order-independent + comp = ZstdCompressor() - parts = [] + parts = [comp.compress(input, ZstdCompressor.FLUSH_BLOCK)] for _ in range(num_threads): res = comp.compress(input, ZstdCompressor.FLUSH_BLOCK) if res: @@ -2445,7 +2688,7 @@ def test_compress_locking(self): expected = b''.join(parts) + rest1 comp = ZstdCompressor() - output = [] + output = [comp.compress(input, ZstdCompressor.FLUSH_BLOCK)] def run_method(method, input_data, output_data): res = method(input_data, ZstdCompressor.FLUSH_BLOCK) if res: @@ -2465,7 +2708,6 @@ def run_method(method, input_data, output_data): actual = b''.join(output) + rest2 self.assertEqual(expected, actual) - @unittest.skipUnless(Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_decompress_locking(self): @@ -2501,6 +2743,59 @@ def run_method(method, input_data, output_data): actual = b''.join(output) self.assertEqual(expected, actual) + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_compress_shared_dict(self): + num_threads = 8 + + def run_method(b): + level = threading.get_ident() % 4 + # sync threads to increase chance of contention on + # capsule storing dictionary levels + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_digested_dict) + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_undigested_dict) + b.wait() + ZstdCompressor(level=level, + zstd_dict=TRAINED_DICT.as_prefix) + threads = [] + + b = threading.Barrier(num_threads) + for i in range(num_threads): + thread = threading.Thread(target=run_method, args=(b,)) + + threads.append(thread) + + with threading_helper.start_threads(threads): + pass + + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_decompress_shared_dict(self): + num_threads = 8 + + def run_method(b): + # sync threads to increase chance of contention on + # decompression dictionary + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_digested_dict) + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_undigested_dict) + b.wait() + ZstdDecompressor(zstd_dict=TRAINED_DICT.as_prefix) + threads = [] + + b = threading.Barrier(num_threads) + for i in range(num_threads): + thread = threading.Thread(target=run_method, args=(b,)) + + threads.append(thread) + + with threading_helper.start_threads(threads): + pass if __name__ == "__main__": diff --git a/Lib/test/typinganndata/fwdref_module.py b/Lib/test/typinganndata/fwdref_module.py new file mode 100644 index 00000000000000..7347a7a42455c2 --- /dev/null +++ b/Lib/test/typinganndata/fwdref_module.py @@ -0,0 +1,6 @@ +from typing import ForwardRef + +MyList = list[int] +MyDict = dict[str, 'MyList'] + +fw = ForwardRef('MyDict', module=__name__) diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 5ae439f5cd3b78..41366fbf443a4f 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -211,7 +211,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. - if self.break_long_words: + if self.break_long_words and space_left > 0: end = space_left chunk = reversed_chunks[-1] if self.break_on_hyphens and len(chunk) > space_left: diff --git a/Lib/threading.py b/Lib/threading.py index 39a1a7f4cdfda0..c03b0b5370c933 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -165,7 +165,7 @@ def __repr__(self): except KeyError: pass return "<%s %s.%s object owner=%r count=%d at %s>" % ( - "locked" if self._block.locked() else "unlocked", + "locked" if self.locked() else "unlocked", self.__class__.__module__, self.__class__.__qualname__, owner, @@ -244,7 +244,7 @@ def __exit__(self, t, v, tb): def locked(self): """Return whether this object is locked.""" - return self._count > 0 + return self._block.locked() # Internal methods used by condition variables @@ -660,7 +660,8 @@ def wait(self, timeout=None): (or fractions thereof). This method returns the internal flag on exit, so it will always return - True except if a timeout is given and the operation times out. + ``True`` except if a timeout is given and the operation times out, when + it will return ``False``. """ with self._cond: @@ -951,6 +952,8 @@ def _after_fork(self, new_ident=None): # This thread is alive. self._ident = new_ident assert self._os_thread_handle.ident == new_ident + if _HAVE_THREAD_NATIVE_ID: + self._set_native_id() else: # Otherwise, the thread is dead, Jim. _PyThread_AfterFork() # already marked our handle done. @@ -1419,7 +1422,7 @@ def __init__(self, dummy_thread): # the related _DummyThread will be kept forever! _thread_local_info._track_dummy_thread_ref = self - def __del__(self): + def __del__(self, _active_limbo_lock=_active_limbo_lock, _active=_active): with _active_limbo_lock: if _active.get(self._tident) is self._dummy_thread: _active.pop(self._tident, None) diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index a693b04870b995..9526d8b949fa3b 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -7,25 +7,25 @@ LabelFrame and PanedWindow. Properties of the widgets are specified with keyword arguments. -Keyword arguments have the same name as the corresponding resource +Keyword arguments have the same name as the corresponding options under Tk. Widgets are positioned with one of the geometry managers Place, Pack or Grid. These managers can be called with methods place, pack, grid available in every Widget. -Actions are bound to events by resources (e.g. keyword argument -command) or with the method bind. +Actions are bound to events by options (e.g. the command +keyword argument) or with the bind() method. Example (Hello, World): import tkinter from tkinter.constants import * tk = tkinter.Tk() frame = tkinter.Frame(tk, relief=RIDGE, borderwidth=2) -frame.pack(fill=BOTH,expand=1) +frame.pack(fill=BOTH, expand=1) label = tkinter.Label(frame, text="Hello, World") label.pack(fill=X, expand=1) -button = tkinter.Button(frame,text="Exit",command=tk.destroy) +button = tkinter.Button(frame, text="Exit", command=tk.destroy) button.pack(side=BOTTOM) tk.mainloop() """ @@ -847,7 +847,7 @@ def tk_focusNext(self): The focus order first goes to the next child, then to the children of the child recursively and then to the next sibling which is higher in the stacking order. A - widget is omitted if it has the takefocus resource set + widget is omitted if it has the takefocus option set to 0.""" name = self.tk.call('tk_focusNext', self._w) if not name: return None @@ -1827,18 +1827,24 @@ def _configure(self, cmd, cnf, kw): # These used to be defined in Widget: def configure(self, cnf=None, **kw): - """Configure resources of a widget. + """Query or modify the configuration options of the widget. - The values for resources are specified as keyword - arguments. To get an overview about - the allowed keyword arguments call the method keys. + If no arguments are specified, return a dictionary describing + all of the available options for the widget. + + If an option name is specified, then return a tuple describing + the one named option. + + If one or more keyword arguments are specified or a dictionary + is specified, then modify the widget option(s) to have the given + value(s). """ return self._configure('configure', cnf, kw) config = configure def cget(self, key): - """Return the resource value for a KEY given as string.""" + """Return the current value of the configuration option.""" return self.tk.call(self._w, 'cget', '-' + key) __getitem__ = cget @@ -1847,7 +1853,7 @@ def __setitem__(self, key, value): self.configure({key: value}) def keys(self): - """Return a list of all resource names of this widget.""" + """Return a list of all option names of this widget.""" splitlist = self.tk.splitlist return [splitlist(x)[0][1:] for x in splitlist(self.tk.call(self._w, 'configure'))] @@ -1966,7 +1972,7 @@ def _grid_configure(self, command, index, cnf, kw): def grid_columnconfigure(self, index, cnf={}, **kw): """Configure column INDEX of a grid. - Valid resources are minsize (minimum size of the column), + Valid options are minsize (minimum size of the column), weight (how much does additional space propagate to this column) and pad (how much space to let additionally).""" return self._grid_configure('columnconfigure', index, cnf, kw) @@ -1997,7 +2003,7 @@ def grid_propagate(self, flag=_noarg_): def grid_rowconfigure(self, index, cnf={}, **kw): """Configure row INDEX of a grid. - Valid resources are minsize (minimum size of the row), + Valid options are minsize (minimum size of the row), weight (how much does additional space propagate to this row) and pad (how much space to let additionally).""" return self._grid_configure('rowconfigure', index, cnf, kw) @@ -2817,7 +2823,7 @@ class Toplevel(BaseWidget, Wm): def __init__(self, master=None, cnf={}, **kw): """Construct a toplevel widget with the parent MASTER. - Valid resource names: background, bd, bg, borderwidth, class, + Valid option names: background, bd, bg, borderwidth, class, colormap, container, cursor, height, highlightbackground, highlightcolor, highlightthickness, menu, relief, screen, takefocus, use, visual, width.""" @@ -2894,7 +2900,7 @@ class Canvas(Widget, XView, YView): def __init__(self, master=None, cnf={}, **kw): """Construct a canvas widget with the parent MASTER. - Valid resource names: background, bd, bg, borderwidth, closeenough, + Valid option names: background, bd, bg, borderwidth, closeenough, confine, cursor, height, highlightbackground, highlightcolor, highlightthickness, insertbackground, insertborderwidth, insertofftime, insertontime, insertwidth, offset, relief, @@ -3103,16 +3109,14 @@ def insert(self, *args): self.tk.call((self._w, 'insert') + args) def itemcget(self, tagOrId, option): - """Return the resource value for an OPTION for item TAGORID.""" + """Return the value of OPTION for item TAGORID.""" return self.tk.call( (self._w, 'itemcget') + (tagOrId, '-'+option)) def itemconfigure(self, tagOrId, cnf=None, **kw): - """Configure resources of an item TAGORID. + """Query or modify the configuration options of an item TAGORID. - The values for resources are specified as keyword - arguments. To get an overview about - the allowed keyword arguments call the method without arguments. + Similar to configure() except that it applies to the specified item. """ return self._configure(('itemconfigure', tagOrId), cnf, kw) @@ -3204,7 +3208,7 @@ class Checkbutton(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct a checkbutton widget with the parent MASTER. - Valid resource names: activebackground, activeforeground, anchor, + Valid option names: activebackground, activeforeground, anchor, background, bd, bg, bitmap, borderwidth, command, cursor, disabledforeground, fg, font, foreground, height, highlightbackground, highlightcolor, highlightthickness, image, @@ -3235,7 +3239,7 @@ def flash(self): self.tk.call(self._w, 'flash') def invoke(self): - """Toggle the button and invoke a command if given as resource.""" + """Toggle the button and invoke a command if given as option.""" return self.tk.call(self._w, 'invoke') def select(self): @@ -3253,7 +3257,7 @@ class Entry(Widget, XView): def __init__(self, master=None, cnf={}, **kw): """Construct an entry widget with the parent MASTER. - Valid resource names: background, bd, bg, borderwidth, cursor, + Valid option names: background, bd, bg, borderwidth, cursor, exportselection, fg, font, foreground, highlightbackground, highlightcolor, highlightthickness, insertbackground, insertborderwidth, insertofftime, insertontime, insertwidth, @@ -3339,7 +3343,7 @@ class Frame(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct a frame widget with the parent MASTER. - Valid resource names: background, bd, bg, borderwidth, class, + Valid option names: background, bd, bg, borderwidth, class, colormap, container, cursor, height, highlightbackground, highlightcolor, highlightthickness, relief, takefocus, visual, width.""" cnf = _cnfmerge((cnf, kw)) @@ -3383,7 +3387,7 @@ class Listbox(Widget, XView, YView): def __init__(self, master=None, cnf={}, **kw): """Construct a listbox widget with the parent MASTER. - Valid resource names: background, bd, bg, borderwidth, cursor, + Valid option names: background, bd, bg, borderwidth, cursor, exportselection, fg, font, foreground, height, highlightbackground, highlightcolor, highlightthickness, relief, selectbackground, selectborderwidth, selectforeground, selectmode, setgrid, takefocus, @@ -3476,18 +3480,15 @@ def size(self): return self.tk.getint(self.tk.call(self._w, 'size')) def itemcget(self, index, option): - """Return the resource value for an ITEM and an OPTION.""" + """Return the value of OPTION for item at INDEX.""" return self.tk.call( (self._w, 'itemcget') + (index, '-'+option)) def itemconfigure(self, index, cnf=None, **kw): - """Configure resources of an ITEM. + """Query or modify the configuration options of an item at INDEX. - The values for resources are specified as keyword arguments. - To get an overview about the allowed keyword arguments - call the method without arguments. - Valid resource names: background, bg, foreground, fg, - selectbackground, selectforeground.""" + Similar to configure() except that it applies to the specified item. + """ return self._configure(('itemconfigure', index), cnf, kw) itemconfig = itemconfigure @@ -3499,7 +3500,7 @@ class Menu(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct menu widget with the parent MASTER. - Valid resource names: activebackground, activeborderwidth, + Valid option names: activebackground, activeborderwidth, activeforeground, background, bd, bg, borderwidth, cursor, disabledforeground, fg, font, foreground, postcommand, relief, selectcolor, takefocus, tearoff, tearoffcommand, title, type.""" @@ -3580,11 +3581,15 @@ def delete(self, index1, index2=None): self.tk.call(self._w, 'delete', index1, index2) def entrycget(self, index, option): - """Return the resource value of a menu item for OPTION at INDEX.""" + """Return the value of OPTION for a menu item at INDEX.""" return self.tk.call(self._w, 'entrycget', index, '-' + option) def entryconfigure(self, index, cnf=None, **kw): - """Configure a menu item at INDEX.""" + """Query or modify the configuration options of a menu item at INDEX. + + Similar to configure() except that it applies to the specified + menu item. + """ return self._configure(('entryconfigure', index), cnf, kw) entryconfig = entryconfigure @@ -3642,7 +3647,7 @@ class Radiobutton(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct a radiobutton widget with the parent MASTER. - Valid resource names: activebackground, activeforeground, anchor, + Valid option names: activebackground, activeforeground, anchor, background, bd, bg, bitmap, borderwidth, command, cursor, disabledforeground, fg, font, foreground, height, highlightbackground, highlightcolor, highlightthickness, image, @@ -3661,7 +3666,7 @@ def flash(self): self.tk.call(self._w, 'flash') def invoke(self): - """Toggle the button and invoke a command if given as resource.""" + """Toggle the button and invoke a command if given as option.""" return self.tk.call(self._w, 'invoke') def select(self): @@ -3675,7 +3680,7 @@ class Scale(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct a scale widget with the parent MASTER. - Valid resource names: activebackground, background, bigincrement, bd, + Valid option names: activebackground, background, bigincrement, bd, bg, borderwidth, command, cursor, digits, fg, font, foreground, from, highlightbackground, highlightcolor, highlightthickness, label, length, orient, relief, repeatdelay, repeatinterval, resolution, @@ -3714,7 +3719,7 @@ class Scrollbar(Widget): def __init__(self, master=None, cnf={}, **kw): """Construct a scrollbar widget with the parent MASTER. - Valid resource names: activebackground, activerelief, + Valid option names: activebackground, activerelief, background, bd, bg, borderwidth, command, cursor, elementborderwidth, highlightbackground, highlightcolor, highlightthickness, jump, orient, @@ -3958,7 +3963,11 @@ def image_cget(self, index, option): return self.tk.call(self._w, "image", "cget", index, option) def image_configure(self, index, cnf=None, **kw): - """Configure an embedded image at INDEX.""" + """Query or modify the configuration options of an embedded image at INDEX. + + Similar to configure() except that it applies to the specified + embedded image. + """ return self._configure(('image', 'configure', index), cnf, kw) def image_create(self, index, cnf={}, **kw): @@ -4096,7 +4105,10 @@ def tag_cget(self, tagName, option): return self.tk.call(self._w, 'tag', 'cget', tagName, option) def tag_configure(self, tagName, cnf=None, **kw): - """Configure a tag TAGNAME.""" + """Query or modify the configuration options of a tag TAGNAME. + + Similar to configure() except that it applies to the specified tag. + """ return self._configure(('tag', 'configure', tagName), cnf, kw) tag_config = tag_configure @@ -4154,7 +4166,11 @@ def window_cget(self, index, option): return self.tk.call(self._w, 'window', 'cget', index, option) def window_configure(self, index, cnf=None, **kw): - """Configure an embedded window at INDEX.""" + """Query or modify the configuration options of an embedded window at INDEX. + + Similar to configure() except that it applies to the specified + embedded window. + """ return self._configure(('window', 'configure', index), cnf, kw) window_config = window_configure @@ -4194,7 +4210,7 @@ class OptionMenu(Menubutton): def __init__(self, master, variable, value, *values, **kwargs): """Construct an optionmenu widget with the parent MASTER, with - the resource textvariable set to VARIABLE, the initially selected + the option textvariable set to VARIABLE, the initially selected value VALUE, the other menu values VALUES and an additional keyword argument command.""" kw = {"borderwidth": 2, "textvariable": variable, @@ -4296,7 +4312,7 @@ class PhotoImage(Image): def __init__(self, name=None, cnf={}, master=None, **kw): """Create an image with NAME. - Valid resource names: data, format, file, gamma, height, palette, + Valid option names: data, format, file, gamma, height, palette, width.""" Image.__init__(self, 'photo', name, cnf, master, **kw) @@ -4559,7 +4575,7 @@ class BitmapImage(Image): def __init__(self, name=None, cnf={}, master=None, **kw): """Create a bitmap with NAME. - Valid resource names: background, data, file, foreground, maskdata, maskfile.""" + Valid option names: background, data, file, foreground, maskdata, maskfile.""" Image.__init__(self, 'bitmap', name, cnf, master, **kw) @@ -4877,26 +4893,17 @@ def sash_place(self, index, x, y): return self.sash("place", index, x, y) def panecget(self, child, option): - """Query a management option for window. - - Option may be any value allowed by the paneconfigure subcommand - """ + """Return the value of option for a child window.""" return self.tk.call( (self._w, 'panecget') + (child, '-'+option)) def paneconfigure(self, tagOrId, cnf=None, **kw): - """Query or modify the management options for window. - - If no option is specified, returns a list describing all - of the available options for pathName. If option is - specified with no value, then the command returns a list - describing the one named option (this list will be identical - to the corresponding sublist of the value returned if no - option is specified). If one or more option-value pairs are - specified, then the command modifies the given widget - option(s) to have the given value(s); in this case the - command returns an empty string. The following options - are supported: + """Query or modify the configuration options for a child window. + + Similar to configure() except that it applies to the specified + window. + + The following options are supported: after window Insert the window after the window specified. window diff --git a/Lib/tkinter/scrolledtext.py b/Lib/tkinter/scrolledtext.py index 4f9a8815b6184b..8dcead5e31930e 100644 --- a/Lib/tkinter/scrolledtext.py +++ b/Lib/tkinter/scrolledtext.py @@ -23,7 +23,7 @@ def __init__(self, master=None, **kw): self.vbar = Scrollbar(self.frame) self.vbar.pack(side=RIGHT, fill=Y) - kw.update({'yscrollcommand': self.vbar.set}) + kw['yscrollcommand'] = self.vbar.set Text.__init__(self, self.frame, **kw) self.pack(side=LEFT, fill=BOTH, expand=True) self.vbar['command'] = self.yview diff --git a/Lib/tkinter/ttk.py b/Lib/tkinter/ttk.py index c0cf1e787fa9ad..ef2b91dfbb6638 100644 --- a/Lib/tkinter/ttk.py +++ b/Lib/tkinter/ttk.py @@ -1589,7 +1589,7 @@ class OptionMenu(Menubutton): def __init__(self, master, variable, default=None, *values, **kwargs): """Construct a themed OptionMenu widget with master as the parent, - the resource textvariable set to variable, the initially selected + the option textvariable set to variable, the initially selected value specified by the default parameter, the menu values given by *values and additional keywords. diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 8d01fd7bce41b0..1f31258ce361c9 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -36,7 +36,7 @@ from token import EXACT_TOKEN_TYPES import _tokenize -cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) +cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token @@ -86,7 +86,7 @@ def _all_string_prefixes(): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permutations (include 'fr', but not # 'rf'). The various permutations will be generated. - _valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr'] + _valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr'] # if we add binary f-strings, add: ['fb', 'fbr'] result = {''} for prefix in _valid_string_prefixes: @@ -274,7 +274,7 @@ def compat(self, token, iterable): toks_append = self.tokens.append startline = token[0] in (NEWLINE, NL) prevstring = False - in_fstring = 0 + in_fstring_or_tstring = 0 for tok in _itertools.chain([token], iterable): toknum, tokval = tok[:2] @@ -293,10 +293,10 @@ def compat(self, token, iterable): else: prevstring = False - if toknum == FSTRING_START: - in_fstring += 1 - elif toknum == FSTRING_END: - in_fstring -= 1 + if toknum in {FSTRING_START, TSTRING_START}: + in_fstring_or_tstring += 1 + elif toknum in {FSTRING_END, TSTRING_END}: + in_fstring_or_tstring -= 1 if toknum == INDENT: indents.append(tokval) continue @@ -311,8 +311,8 @@ def compat(self, token, iterable): elif toknum in {FSTRING_MIDDLE, TSTRING_MIDDLE}: tokval = self.escape_brackets(tokval) - # Insert a space between two consecutive brackets if we are in an f-string - if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring: + # Insert a space between two consecutive brackets if we are in an f-string or t-string + if tokval in {"{", "}"} and self.tokens and self.tokens[-1] == tokval and in_fstring_or_tstring: tokval = ' ' + tokval # Insert a space between two consecutive f-strings @@ -385,22 +385,23 @@ def read_or_stop(): except StopIteration: return b'' - def find_cookie(line): + def check(line, encoding): + # Check if the line matches the encoding. + if 0 in line: + raise SyntaxError("source code cannot contain null bytes") try: - # Decode as UTF-8. Either the line is an encoding declaration, - # in which case it should be pure ASCII, or it must be UTF-8 - # per default encoding. - line_string = line.decode('utf-8') + line.decode(encoding) except UnicodeDecodeError: msg = "invalid or missing encoding declaration" if filename is not None: msg = '{} for {!r}'.format(msg, filename) raise SyntaxError(msg) - match = cookie_re.match(line_string) + def find_cookie(line): + match = cookie_re.match(line) if not match: return None - encoding = _get_normal_name(match.group(1)) + encoding = _get_normal_name(match.group(1).decode()) try: codec = lookup(encoding) except LookupError: @@ -433,18 +434,23 @@ def find_cookie(line): encoding = find_cookie(first) if encoding: + check(first, encoding) return encoding, [first] if not blank_re.match(first): + check(first, default) return default, [first] second = read_or_stop() if not second: + check(first, default) return default, [first] encoding = find_cookie(second) if encoding: + check(first + second, encoding) return encoding, [first, second] + check(first + second, default) return default, [first, second] diff --git a/Lib/traceback.py b/Lib/traceback.py index 17b082eced6f05..5a34a2b87b6df8 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -534,7 +534,7 @@ def format_frame_summary(self, frame_summary, **kwargs): colorize = kwargs.get("colorize", False) row = [] filename = frame_summary.filename - if frame_summary.filename.startswith("<stdin>-"): + if frame_summary.filename.startswith("<stdin-") and frame_summary.filename.endswith('>'): filename = "<stdin>" if colorize: theme = _colorize.get_theme(force_color=True).traceback @@ -1310,7 +1310,6 @@ def _find_keyword_typos(self): lines = source.splitlines() error_code = lines[line -1 if line > 0 else 0:end_line] - error_code[0] = error_code[0][offset:] error_code = textwrap.dedent('\n'.join(error_code)) # Do not continue if the source is too large @@ -1326,7 +1325,8 @@ def _find_keyword_typos(self): if token.type != tokenize.NAME: continue # Only consider NAME tokens on the same line as the error - if from_filename and token.start[0]+line != end_line+1: + the_end = end_line if line == 0 else end_line + 1 + if from_filename and token.start[0]+line != the_end: continue wrong_name = token.string if wrong_name in keyword.kwlist: @@ -1595,7 +1595,11 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): if isinstance(exc_value, AttributeError): obj = exc_value.obj try: - d = dir(obj) + try: + d = dir(obj) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(obj.__class__.__dict__.keys()) + list(obj.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) hide_underscored = (wrong_name[:1] != '_') if hide_underscored and tb is not None: while tb.tb_next is not None: @@ -1610,7 +1614,11 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): elif isinstance(exc_value, ImportError): try: mod = __import__(exc_value.name) - d = dir(mod) + try: + d = dir(mod) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(mod.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) if wrong_name[:1] != '_': d = [x for x in d if x[:1] != '_'] except Exception: @@ -1628,6 +1636,7 @@ def _compute_suggestion_error(exc_value, tb, wrong_name): + list(frame.f_globals) + list(frame.f_builtins) ) + d = [x for x in d if isinstance(x, str)] # Check first if we are in a method and the instance # has the wrong name as attribute diff --git a/Lib/turtle.py b/Lib/turtle.py index e88981d298ad52..e5ce2c0a03cad6 100644 --- a/Lib/turtle.py +++ b/Lib/turtle.py @@ -1214,16 +1214,32 @@ def turtles(self): def bgcolor(self, *args): """Set or return backgroundcolor of the TurtleScreen. - Arguments (if given): a color string or three numbers - in the range 0..colormode or a 3-tuple of such numbers. + Four input formats are allowed: + - bgcolor() + Return the current background color as color specification + string or as a tuple (see example). May be used as input + to another color/pencolor/fillcolor/bgcolor call. + - bgcolor(colorstring) + Set the background color to colorstring, which is a Tk color + specification string, such as "red", "yellow", or "#33cc8c". + - bgcolor((r, g, b)) + Set the background color to the RGB color represented by + the tuple of r, g, and b. Each of r, g, and b must be in + the range 0..colormode, where colormode is either 1.0 or 255 + (see colormode()). + - bgcolor(r, g, b) + Set the background color to the RGB color represented by + r, g, and b. Each of r, g, and b must be in the range + 0..colormode. Example (for a TurtleScreen instance named screen): >>> screen.bgcolor("orange") >>> screen.bgcolor() 'orange' - >>> screen.bgcolor(0.5,0,0.5) + >>> colormode(255) + >>> screen.bgcolor('#800080') >>> screen.bgcolor() - '#800080' + (128.0, 0.0, 128.0) """ if args: color = self._colorstr(args) @@ -1678,7 +1694,7 @@ def forward(self, distance): Example (for a Turtle instance named turtle): >>> turtle.position() - (0.00, 0.00) + (0.00,0.00) >>> turtle.forward(25) >>> turtle.position() (25.00,0.00) @@ -1701,10 +1717,10 @@ def back(self, distance): Example (for a Turtle instance named turtle): >>> turtle.position() - (0.00, 0.00) + (0.00,0.00) >>> turtle.backward(30) >>> turtle.position() - (-30.00, 0.00) + (-30.00,0.00) """ self._go(-distance) @@ -1811,7 +1827,7 @@ def goto(self, x, y=None): Example (for a Turtle instance named turtle): >>> tp = turtle.pos() >>> tp - (0.00, 0.00) + (0.00,0.00) >>> turtle.setpos(60,30) >>> turtle.pos() (60.00,30.00) @@ -1891,7 +1907,7 @@ def distance(self, x, y=None): Example (for a Turtle instance named turtle): >>> turtle.pos() - (0.00, 0.00) + (0.00,0.00) >>> turtle.distance(30,40) 50.0 >>> pen = Turtle() @@ -2230,19 +2246,17 @@ def color(self, *args): Arguments: Several input formats are allowed. - They use 0, 1, 2, or 3 arguments as follows: - - color() - Return the current pencolor and the current fillcolor - as a pair of color specification strings as are returned - by pencolor and fillcolor. - color(colorstring), color((r,g,b)), color(r,g,b) - inputs as in pencolor, set both, fillcolor and pencolor, + They use 0 to 3 arguments as follows: + - color() + Return the current pencolor and the current fillcolor as + a pair of color specification strings or tuples as returned + by pencolor() and fillcolor(). + - color(colorstring), color((r,g,b)), color(r,g,b) + Inputs as in pencolor(), set both, fillcolor and pencolor, to the given value. - color(colorstring1, colorstring2), - color((r1,g1,b1), (r2,g2,b2)) - equivalent to pencolor(colorstring1) and fillcolor(colorstring2) - and analogously, if the other input format is used. + - color(colorstring1, colorstring2), color((r1,g1,b1), (r2,g2,b2)) + Equivalent to pencolor(colorstring1) and fillcolor(colorstring2) + and analogously if the other input format is used. If turtleshape is a polygon, outline and interior of that polygon is drawn with the newly set colors. @@ -2253,9 +2267,9 @@ def color(self, *args): >>> turtle.color() ('red', 'green') >>> colormode(255) - >>> color((40, 80, 120), (160, 200, 240)) + >>> color(('#285078', '#a0c8f0')) >>> color() - ('#285078', '#a0c8f0') + ((40.0, 80.0, 120.0), (160.0, 200.0, 240.0)) """ if args: l = len(args) @@ -2277,28 +2291,32 @@ def pencolor(self, *args): Arguments: Four input formats are allowed: - pencolor() - Return the current pencolor as color specification string, - possibly in hex-number format (see example). - May be used as input to another color/pencolor/fillcolor call. + Return the current pencolor as color specification string or + as a tuple (see example). May be used as input to another + color/pencolor/fillcolor/bgcolor call. - pencolor(colorstring) - s is a Tk color specification string, such as "red" or "yellow" + Set pencolor to colorstring, which is a Tk color + specification string, such as "red", "yellow", or "#33cc8c". - pencolor((r, g, b)) - *a tuple* of r, g, and b, which represent, an RGB color, - and each of r, g, and b are in the range 0..colormode, - where colormode is either 1.0 or 255 + Set pencolor to the RGB color represented by the tuple of + r, g, and b. Each of r, g, and b must be in the range + 0..colormode, where colormode is either 1.0 or 255 (see + colormode()). - pencolor(r, g, b) - r, g, and b represent an RGB color, and each of r, g, and b - are in the range 0..colormode + Set pencolor to the RGB color represented by r, g, and b. + Each of r, g, and b must be in the range 0..colormode. If turtleshape is a polygon, the outline of that polygon is drawn with the newly set pencolor. Example (for a Turtle instance named turtle): >>> turtle.pencolor('brown') - >>> tup = (0.2, 0.8, 0.55) - >>> turtle.pencolor(tup) >>> turtle.pencolor() - '#33cc8c' + 'brown' + >>> colormode(255) + >>> turtle.pencolor('#32c18f') + >>> turtle.pencolor() + (50.0, 193.0, 143.0) """ if args: color = self._colorstr(args) @@ -2315,26 +2333,31 @@ def fillcolor(self, *args): Four input formats are allowed: - fillcolor() Return the current fillcolor as color specification string, - possibly in hex-number format (see example). - May be used as input to another color/pencolor/fillcolor call. + possibly in tuple format (see example). May be used as + input to another color/pencolor/fillcolor/bgcolor call. - fillcolor(colorstring) - s is a Tk color specification string, such as "red" or "yellow" + Set fillcolor to colorstring, which is a Tk color + specification string, such as "red", "yellow", or "#33cc8c". - fillcolor((r, g, b)) - *a tuple* of r, g, and b, which represent, an RGB color, - and each of r, g, and b are in the range 0..colormode, - where colormode is either 1.0 or 255 + Set fillcolor to the RGB color represented by the tuple of + r, g, and b. Each of r, g, and b must be in the range + 0..colormode, where colormode is either 1.0 or 255 (see + colormode()). - fillcolor(r, g, b) - r, g, and b represent an RGB color, and each of r, g, and b - are in the range 0..colormode + Set fillcolor to the RGB color represented by r, g, and b. + Each of r, g, and b must be in the range 0..colormode. If turtleshape is a polygon, the interior of that polygon is drawn with the newly set fillcolor. Example (for a Turtle instance named turtle): >>> turtle.fillcolor('violet') - >>> col = turtle.pencolor() - >>> turtle.fillcolor(col) - >>> turtle.fillcolor(0, .5, 0) + >>> turtle.fillcolor() + 'violet' + >>> colormode(255) + >>> turtle.fillcolor('#ffffff') + >>> turtle.fillcolor() + (255.0, 255.0, 255.0) """ if args: color = self._colorstr(args) diff --git a/Lib/typing.py b/Lib/typing.py index 2baf655256d1eb..92b78defd11976 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -65,6 +65,7 @@ # ABCs (from collections.abc). 'AbstractSet', # collections.abc.Set. + 'ByteString', 'Container', 'ContextManager', 'Hashable', @@ -171,16 +172,16 @@ def __getattr__(self, attr): _lazy_annotationlib = _LazyAnnotationLib() -def _type_convert(arg, module=None, *, allow_special_forms=False): +def _type_convert(arg, module=None, *, allow_special_forms=False, owner=None): """For converting None to type(None), and strings to ForwardRef.""" if arg is None: return type(None) if isinstance(arg, str): - return _make_forward_ref(arg, module=module, is_class=allow_special_forms) + return _make_forward_ref(arg, module=module, is_class=allow_special_forms, owner=owner) return arg -def _type_check(arg, msg, is_argument=True, module=None, *, allow_special_forms=False): +def _type_check(arg, msg, is_argument=True, module=None, *, allow_special_forms=False, owner=None): """Check that the argument is a type, and return it (internal helper). As a special case, accept None and return type(None) instead. Also wrap strings @@ -198,7 +199,7 @@ def _type_check(arg, msg, is_argument=True, module=None, *, allow_special_forms= if is_argument: invalid_generic_forms += (Final,) - arg = _type_convert(arg, module=module, allow_special_forms=allow_special_forms) + arg = _type_convert(arg, module=module, allow_special_forms=allow_special_forms, owner=owner) if (isinstance(arg, _GenericAlias) and arg.__origin__ in invalid_generic_forms): raise TypeError(f"{arg} is not valid as type argument") @@ -430,7 +431,7 @@ def __repr__(self): def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=frozenset(), - format=None, owner=None): + format=None, owner=None, parent_fwdref=None, prefer_fwd_module=False): """Evaluate all forward references in the given type t. For use of globalns and localns see the docstring for get_type_hints(). @@ -443,15 +444,27 @@ def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=f if isinstance(t, _lazy_annotationlib.ForwardRef): # If the forward_ref has __forward_module__ set, evaluate() infers the globals # from the module, and it will probably pick better than the globals we have here. - if t.__forward_module__ is not None: + # We do this only for calls from get_type_hints() (which opts in through the + # prefer_fwd_module flag), so that the default behavior remains more straightforward. + if prefer_fwd_module and t.__forward_module__ is not None: globalns = None + # If there are type params on the owner, we need to add them back, because + # annotationlib won't. + if owner_type_params := getattr(owner, "__type_params__", None): + globalns = getattr( + sys.modules.get(t.__forward_module__, None), "__dict__", None + ) + if globalns is not None: + globalns = dict(globalns) + for type_param in owner_type_params: + globalns[type_param.__name__] = type_param return evaluate_forward_ref(t, globals=globalns, locals=localns, type_params=type_params, owner=owner, _recursive_guard=recursive_guard, format=format) if isinstance(t, (_GenericAlias, GenericAlias, Union)): if isinstance(t, GenericAlias): args = tuple( - _make_forward_ref(arg) if isinstance(arg, str) else arg + _make_forward_ref(arg, parent_fwdref=parent_fwdref) if isinstance(arg, str) else arg for arg in t.__args__ ) is_unpacked = t.__unpacked__ @@ -465,7 +478,7 @@ def _eval_type(t, globalns, localns, type_params=_sentinel, *, recursive_guard=f ev_args = tuple( _eval_type( a, globalns, localns, type_params, recursive_guard=recursive_guard, - format=format, owner=owner, + format=format, owner=owner, prefer_fwd_module=prefer_fwd_module, ) for a in t.__args__ ) @@ -936,7 +949,12 @@ def run(arg: Child | Unrelated): return _GenericAlias(self, (item,)) -def _make_forward_ref(code, **kwargs): +def _make_forward_ref(code, *, parent_fwdref=None, **kwargs): + if parent_fwdref is not None: + if parent_fwdref.__forward_module__ is not None: + kwargs['module'] = parent_fwdref.__forward_module__ + if parent_fwdref.__owner__ is not None: + kwargs['owner'] = parent_fwdref.__owner__ forward_ref = _lazy_annotationlib.ForwardRef(code, **kwargs) # For compatibility, eagerly compile the forwardref's code. forward_ref.__forward_code__ @@ -956,12 +974,8 @@ def evaluate_forward_ref( """Evaluate a forward reference as a type hint. This is similar to calling the ForwardRef.evaluate() method, - but unlike that method, evaluate_forward_ref() also: - - * Recursively evaluates forward references nested within the type hint. - * Rejects certain objects that are not valid type hints. - * Replaces type hints that evaluate to None with types.NoneType. - * Supports the *FORWARDREF* and *STRING* formats. + but unlike that method, evaluate_forward_ref() also + recursively evaluates forward references nested within the type hint. *forward_ref* must be an instance of ForwardRef. *owner*, if given, should be the object that holds the annotations that the forward reference @@ -981,35 +995,39 @@ def evaluate_forward_ref( if forward_ref.__forward_arg__ in _recursive_guard: return forward_ref - try: - value = forward_ref.evaluate(globals=globals, locals=locals, - type_params=type_params, owner=owner) - except NameError: - if format == _lazy_annotationlib.Format.FORWARDREF: - return forward_ref - else: - raise - - type_ = _type_check( - value, - "Forward references must evaluate to types.", - is_argument=forward_ref.__forward_is_argument__, - allow_special_forms=forward_ref.__forward_is_class__, - ) + if format is None: + format = _lazy_annotationlib.Format.VALUE + value = forward_ref.evaluate(globals=globals, locals=locals, + type_params=type_params, owner=owner, format=format) + + if (isinstance(value, _lazy_annotationlib.ForwardRef) + and format == _lazy_annotationlib.Format.FORWARDREF): + return value + + if isinstance(value, str): + value = _make_forward_ref(value, module=forward_ref.__forward_module__, + owner=owner or forward_ref.__owner__, + is_argument=forward_ref.__forward_is_argument__, + is_class=forward_ref.__forward_is_class__) + if owner is None: + owner = forward_ref.__owner__ return _eval_type( - type_, + value, globals, locals, type_params, recursive_guard=_recursive_guard | {forward_ref.__forward_arg__}, format=format, owner=owner, + parent_fwdref=forward_ref, ) def _is_unpacked_typevartuple(x: Any) -> bool: + # Need to check 'is True' here + # See: https://github.com/python/cpython/issues/137706 return ((not isinstance(x, type)) and - getattr(x, '__typing_is_unpacked_typevartuple__', False)) + getattr(x, '__typing_is_unpacked_typevartuple__', False) is True) def _is_typevar_like(x: Any) -> bool: @@ -1079,7 +1097,7 @@ def _paramspec_prepare_subst(self, alias, args): params = alias.__parameters__ i = params.index(self) if i == len(args) and self.has_default(): - args = [*args, self.__default__] + args = (*args, self.__default__) if i >= len(args): raise TypeError(f"Too few arguments for {alias}") # Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612. @@ -1124,14 +1142,26 @@ def _generic_class_getitem(cls, args): f"Parameters to {cls.__name__}[...] must all be unique") else: # Subscripting a regular Generic subclass. - for param in cls.__parameters__: + try: + parameters = cls.__parameters__ + except AttributeError as e: + init_subclass = getattr(cls, '__init_subclass__', None) + if init_subclass not in {None, Generic.__init_subclass__}: + e.add_note( + f"Note: this exception may have been caused by " + f"{init_subclass.__qualname__!r} (or the " + f"'__init_subclass__' method on a superclass) not " + f"calling 'super().__init_subclass__()'" + ) + raise + for param in parameters: prepare = getattr(param, '__typing_prepare_subst__', None) if prepare is not None: args = prepare(cls, args) _check_generic_specialization(cls, args) new_args = [] - for param, new_arg in zip(cls.__parameters__, args): + for param, new_arg in zip(parameters, args): if isinstance(param, TypeVarTuple): new_args.extend(new_arg) else: @@ -1567,6 +1597,21 @@ def __ror__(self, left): return Union[left, self] +class _DeprecatedGenericAlias(_SpecialGenericAlias, _root=True): + def __init__( + self, origin, nparams, *, removal_version, inst=True, name=None + ): + super().__init__(origin, nparams, inst=inst, name=name) + self._removal_version = removal_version + + def __instancecheck__(self, inst): + import warnings + warnings._deprecated( + f"{self.__module__}.{self._name}", remove=self._removal_version + ) + return super().__instancecheck__(inst) + + class _CallableGenericAlias(_NotIterable, _GenericAlias, _root=True): def __repr__(self): assert self._name == 'Callable' @@ -1649,6 +1694,9 @@ def __eq__(self, other): return True return NotImplemented + def __hash__(self): + return hash(Union) + class _UnionGenericAlias(metaclass=_UnionGenericAliasMeta): """Compatibility hack. @@ -2334,13 +2382,16 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, # *base_globals* first rather than *base_locals*. # This only affects ForwardRefs. base_globals, base_locals = base_locals, base_globals + type_params = base.__type_params__ + base_globals, base_locals = _add_type_params_to_scope( + type_params, base_globals, base_locals, True) for name, value in ann.items(): - if value is None: - value = type(None) if isinstance(value, str): value = _make_forward_ref(value, is_argument=False, is_class=True) - value = _eval_type(value, base_globals, base_locals, base.__type_params__, - format=format, owner=obj) + value = _eval_type(value, base_globals, base_locals, (), + format=format, owner=obj, prefer_fwd_module=True) + if value is None: + value = type(None) hints[name] = value if include_extras or format == Format.STRING: return hints @@ -2373,9 +2424,8 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, elif localns is None: localns = globalns type_params = getattr(obj, "__type_params__", ()) + globalns, localns = _add_type_params_to_scope(type_params, globalns, localns, False) for name, value in hints.items(): - if value is None: - value = type(None) if isinstance(value, str): # class-level forward refs were handled above, this must be either # a module-level annotation or a function argument annotation @@ -2384,10 +2434,27 @@ def get_type_hints(obj, globalns=None, localns=None, include_extras=False, is_argument=not isinstance(obj, types.ModuleType), is_class=False, ) - hints[name] = _eval_type(value, globalns, localns, type_params, format=format, owner=obj) + value = _eval_type(value, globalns, localns, (), format=format, owner=obj, prefer_fwd_module=True) + if value is None: + value = type(None) + hints[name] = value return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()} +# Add type parameters to the globals and locals scope. This is needed for +# compatibility. +def _add_type_params_to_scope(type_params, globalns, localns, is_class): + if not type_params: + return globalns, localns + globalns = dict(globalns) + localns = dict(localns) + for param in type_params: + if not is_class or param.__name__ not in globalns: + globalns[param.__name__] = param + localns.pop(param.__name__, None) + return globalns, localns + + def _strip_annotations(t): """Strip the annotations from a given type.""" if isinstance(t, _AnnotatedAlias): @@ -2730,6 +2797,9 @@ class Other(Leaf): # Error reported by type checker MutableMapping = _alias(collections.abc.MutableMapping, 2) Sequence = _alias(collections.abc.Sequence, 1) MutableSequence = _alias(collections.abc.MutableSequence, 1) +ByteString = _DeprecatedGenericAlias( + collections.abc.ByteString, 0, removal_version=(3, 17) # Not generic. +) # Tuple accepts variable number of parameters. Tuple = _TupleType(tuple, -1, inst=False, name='Tuple') Tuple.__doc__ = \ @@ -3084,14 +3154,16 @@ def __new__(cls, name, bases, ns, total=True): else: generic_base = () + ns_annotations = ns.pop('__annotations__', None) + tp_dict = type.__new__(_TypedDictMeta, name, (*generic_base, dict), ns) if not hasattr(tp_dict, '__orig_bases__'): tp_dict.__orig_bases__ = bases - if "__annotations__" in ns: + if ns_annotations is not None: own_annotate = None - own_annotations = ns["__annotations__"] + own_annotations = ns_annotations elif (own_annotate := _lazy_annotationlib.get_annotate_from_class_namespace(ns)) is not None: own_annotations = _lazy_annotationlib.call_annotate_function( own_annotate, _lazy_annotationlib.Format.FORWARDREF, owner=tp_dict @@ -3101,7 +3173,7 @@ def __new__(cls, name, bases, ns, total=True): own_annotations = {} msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" own_checked_annotations = { - n: _type_check(tp, msg, module=tp_dict.__module__) + n: _type_check(tp, msg, owner=tp_dict, module=tp_dict.__module__) for n, tp in own_annotations.items() } required_keys = set() @@ -3162,7 +3234,7 @@ def __annotate__(format): if base_annotate is None: continue base_annos = _lazy_annotationlib.call_annotate_function( - base.__annotate__, format, owner=base) + base_annotate, format, owner=base) annos.update(base_annos) if own_annotate is not None: own = _lazy_annotationlib.call_annotate_function( diff --git a/Lib/unittest/main.py b/Lib/unittest/main.py index 6fd949581f3146..be99d93c78cca6 100644 --- a/Lib/unittest/main.py +++ b/Lib/unittest/main.py @@ -269,12 +269,12 @@ def runTests(self): testRunner = self.testRunner self.result = testRunner.run(self.test) if self.exit: - if self.result.testsRun == 0 and len(self.result.skipped) == 0: + if not self.result.wasSuccessful(): + sys.exit(1) + elif self.result.testsRun == 0 and len(self.result.skipped) == 0: sys.exit(_NO_TESTS_EXITCODE) - elif self.result.wasSuccessful(): - sys.exit(0) else: - sys.exit(1) + sys.exit(0) main = TestProgram diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index 55cb4b1f6aff90..e370aa48b7c703 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -569,6 +569,11 @@ def _mock_add_spec(self, spec, spec_set, _spec_as_instance=False, __dict__['_mock_methods'] = spec __dict__['_spec_asyncs'] = _spec_asyncs + def _mock_extend_spec_methods(self, spec_methods): + methods = self.__dict__.get('_mock_methods') or [] + methods.extend(spec_methods) + self.__dict__['_mock_methods'] = methods + def __get_return_value(self): ret = self._mock_return_value if self._mock_delegate is not None: @@ -2766,14 +2771,16 @@ def create_autospec(spec, spec_set=False, instance=False, _parent=None, raise InvalidSpecError(f'Cannot autospec a Mock object. ' f'[object={spec!r}]') is_async_func = _is_async_func(spec) + _kwargs = {'spec': spec} entries = [(entry, _missing) for entry in dir(spec)] if is_type and instance and is_dataclass(spec): + is_dataclass_spec = True dataclass_fields = fields(spec) entries.extend((f.name, f.type) for f in dataclass_fields) - _kwargs = {'spec': [f.name for f in dataclass_fields]} + dataclass_spec_list = [f.name for f in dataclass_fields] else: - _kwargs = {'spec': spec} + is_dataclass_spec = False if spec_set: _kwargs = {'spec_set': spec} @@ -2810,6 +2817,8 @@ def create_autospec(spec, spec_set=False, instance=False, _parent=None, mock = Klass(parent=_parent, _new_parent=_parent, _new_name=_new_name, name=_name, **_kwargs) + if is_dataclass_spec: + mock._mock_extend_spec_methods(dataclass_spec_list) if isinstance(spec, FunctionTypes): # should only happen at the top level because we don't diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 41dc5d7b35dedb..566b8087aec277 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1535,6 +1535,7 @@ def ftp_open(self, req): dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] + fw = None try: fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) type = file and 'I' or 'D' @@ -1552,8 +1553,12 @@ def ftp_open(self, req): headers += "Content-length: %d\n" % retrlen headers = email.message_from_string(headers) return addinfourl(fp, headers, req.full_url) - except ftplib.all_errors as exp: - raise URLError(f"ftp error: {exp}") from exp + except Exception as exp: + if fw is not None and not fw.keepalive: + fw.close() + if isinstance(exp, ftplib.all_errors): + raise URLError(f"ftp error: {exp}") from exp + raise def connect_ftp(self, user, passwd, host, port, dirs, timeout): return ftpwrapper(user, passwd, host, port, dirs, timeout, @@ -1577,14 +1582,15 @@ def setMaxConns(self, m): def connect_ftp(self, user, passwd, host, port, dirs, timeout): key = user, host, port, '/'.join(dirs), timeout - if key in self.cache: - self.timeout[key] = time.time() + self.delay - else: - self.cache[key] = ftpwrapper(user, passwd, host, port, - dirs, timeout) - self.timeout[key] = time.time() + self.delay + conn = self.cache.get(key) + if conn is None or not conn.keepalive: + if conn is not None: + conn.close() + conn = self.cache[key] = ftpwrapper(user, passwd, host, port, + dirs, timeout) + self.timeout[key] = time.time() + self.delay self.check_cache() - return self.cache[key] + return conn def check_cache(self): # first check for old ones @@ -1654,13 +1660,16 @@ def url2pathname(url, *, require_scheme=False, resolve_host=False): The URL authority may be resolved with gethostbyname() if *resolve_host* is set to true. """ - if require_scheme: - scheme, url = _splittype(url) - if scheme != 'file': - raise URLError("URL is missing a 'file:' scheme") - authority, url = _splithost(url) + if not require_scheme: + url = 'file:' + url + scheme, authority, url = urlsplit(url)[:3] # Discard query and fragment. + if scheme != 'file': + raise URLError("URL is missing a 'file:' scheme") if os.name == 'nt': - if not _is_local_authority(authority, resolve_host): + if authority[1:2] == ':': + # e.g. file://c:/file.txt + url = authority + url + elif not _is_local_authority(authority, resolve_host): # e.g. file://server/share/file.txt url = '//' + authority + url elif url[:3] == '///': diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py index 409f2b2e48de6e..4009fd6b58f594 100644 --- a/Lib/urllib/robotparser.py +++ b/Lib/urllib/robotparser.py @@ -11,6 +11,7 @@ """ import collections +import re import urllib.error import urllib.parse import urllib.request @@ -20,6 +21,19 @@ RequestRate = collections.namedtuple("RequestRate", "requests seconds") +def normalize(path): + unquoted = urllib.parse.unquote(path, errors='surrogateescape') + return urllib.parse.quote(unquoted, errors='surrogateescape') + +def normalize_path(path): + path, sep, query = path.partition('?') + path = normalize(path) + if sep: + query = re.sub(r'[^=&]+', lambda m: normalize(m[0]), query) + path += '?' + query + return path + + class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. @@ -55,7 +69,7 @@ def modified(self): def set_url(self, url): """Sets the URL referring to a robots.txt file.""" self.url = url - self.host, self.path = urllib.parse.urlparse(url)[1:3] + self.host, self.path = urllib.parse.urlsplit(url)[1:3] def read(self): """Reads the robots.txt URL and feeds it to the parser.""" @@ -69,7 +83,7 @@ def read(self): err.close() else: raw = f.read() - self.parse(raw.decode("utf-8").splitlines()) + self.parse(raw.decode("utf-8", "surrogateescape").splitlines()) def _add_entry(self, entry): if "*" in entry.useragents: @@ -113,7 +127,7 @@ def parse(self, lines): line = line.split(':', 1) if len(line) == 2: line[0] = line[0].strip().lower() - line[1] = urllib.parse.unquote(line[1].strip()) + line[1] = line[1].strip() if line[0] == "user-agent": if state == 2: self._add_entry(entry) @@ -167,10 +181,11 @@ def can_fetch(self, useragent, url): return False # search for given user agent matches # the first match counts - parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) - url = urllib.parse.urlunparse(('','',parsed_url.path, - parsed_url.params,parsed_url.query, parsed_url.fragment)) - url = urllib.parse.quote(url) + # TODO: The private API is used in order to preserve an empty query. + # This is temporary until the public API starts supporting this feature. + parsed_url = urllib.parse._urlsplit(url, '') + url = urllib.parse._urlunsplit(None, None, *parsed_url[2:]) + url = normalize_path(url) if not url: url = "/" for entry in self.entries: @@ -213,7 +228,6 @@ def __str__(self): entries = entries + [self.default_entry] return '\n\n'.join(map(str, entries)) - class RuleLine: """A rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.""" @@ -221,8 +235,7 @@ def __init__(self, path, allowance): if path == '' and not allowance: # an empty value means allow all allowance = True - path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) - self.path = urllib.parse.quote(path) + self.path = normalize_path(path) self.allowance = allowance def applies_to(self, filename): @@ -268,7 +281,7 @@ def applies_to(self, useragent): def allowance(self, filename): """Preconditions: - our agent applies to this entry - - filename is URL decoded""" + - filename is URL encoded""" for line in self.rulelines: if line.applies_to(filename): return line.allowance diff --git a/Lib/uuid.py b/Lib/uuid.py index 036ffebf67a0be..313f2fc46cb346 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -633,39 +633,43 @@ def _netstat_getnode(): try: import _uuid _generate_time_safe = getattr(_uuid, "generate_time_safe", None) + _has_stable_extractable_node = _uuid.has_stable_extractable_node _UuidCreate = getattr(_uuid, "UuidCreate", None) except ImportError: _uuid = None _generate_time_safe = None + _has_stable_extractable_node = False _UuidCreate = None def _unix_getnode(): """Get the hardware address on Unix using the _uuid extension module.""" - if _generate_time_safe: + if _generate_time_safe and _has_stable_extractable_node: uuid_time, _ = _generate_time_safe() return UUID(bytes=uuid_time).node def _windll_getnode(): """Get the hardware address on Windows using the _uuid extension module.""" - if _UuidCreate: + if _UuidCreate and _has_stable_extractable_node: uuid_bytes = _UuidCreate() return UUID(bytes_le=uuid_bytes).node def _random_getnode(): """Get a random node ID.""" - # RFC 4122, $4.1.6 says "For systems with no IEEE address, a randomly or - # pseudo-randomly generated value may be used; see Section 4.5. The - # multicast bit must be set in such addresses, in order that they will - # never conflict with addresses obtained from network cards." + # RFC 9562, §6.10-3 says that + # + # Implementations MAY elect to obtain a 48-bit cryptographic-quality + # random number as per Section 6.9 to use as the Node ID. [...] [and] + # implementations MUST set the least significant bit of the first octet + # of the Node ID to 1. This bit is the unicast or multicast bit, which + # will never be set in IEEE 802 addresses obtained from network cards. # # The "multicast bit" of a MAC address is defined to be "the least # significant bit of the first octet". This works out to be the 41st bit # counting from 1 being the least significant bit, or 1<<40. # # See https://en.wikipedia.org/w/index.php?title=MAC_address&oldid=1128764812#Universal_vs._local_(U/L_bit) - import random - return random.getrandbits(48) | (1 << 40) + return int.from_bytes(os.urandom(6)) | (1 << 40) # _OS_GETTERS, when known, are targeted for a specific OS or platform. diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index 15e15b7a5184c2..17ee28e826d5cf 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -315,7 +315,7 @@ def setup_python(self, context): os.chmod(path, 0o755) suffixes = ['python', 'python3', f'python3.{sys.version_info[1]}'] - if sys.version_info[:2] == (3, 14): + if sys.version_info[:2] == (3, 14) and sys.getfilesystemencoding() == 'utf-8': suffixes.append('𝜋thon') for suffix in suffixes: path = os.path.join(binpath, suffix) diff --git a/Lib/wave.py b/Lib/wave.py index a34af244c3e224..b8476e264868fc 100644 --- a/Lib/wave.py +++ b/Lib/wave.py @@ -441,6 +441,8 @@ class Wave_write: _datawritten -- the size of the audio samples actually written """ + _file = None + def __init__(self, f): self._i_opened_the_file = None if isinstance(f, str): diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index db51f350ea0153..0a2ccc00f1857d 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -292,13 +292,6 @@ def _append_child(self, node): childNodes.append(node) node.parentNode = self -def _in_document(node): - # return True iff node is part of a document tree - while node is not None: - if node.nodeType == Node.DOCUMENT_NODE: - return True - node = node.parentNode - return False def _write_data(writer, text, attr): "Writes datachars to writer." @@ -1555,7 +1548,7 @@ def _clear_id_cache(node): if node.nodeType == Node.DOCUMENT_NODE: node._id_cache.clear() node._id_search_stack = None - elif _in_document(node): + elif node.ownerDocument: node.ownerDocument._id_cache.clear() node.ownerDocument._id_search_stack= None diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 44ab5d18624e73..dafe5b1b8a0c3f 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -527,7 +527,9 @@ class ElementTree: """ def __init__(self, element=None, file=None): - # assert element is None or iselement(element) + if element is not None and not iselement(element): + raise TypeError('expected an Element, not %s' % + type(element).__name__) self._root = element # first node if file: self.parse(file) @@ -543,7 +545,9 @@ def _setroot(self, element): with the given element. Use with care! """ - # assert iselement(element) + if not iselement(element): + raise TypeError('expected an Element, not %s' + % type(element).__name__) self._root = element def parse(self, source, parser=None): @@ -709,6 +713,8 @@ def write(self, file_or_filename, of start/end tags """ + if self._root is None: + raise TypeError('ElementTree not initialized') if not method: method = "xml" elif method not in _serialize: diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py index 90a356fbb8eae4..3e6871157d0929 100644 --- a/Lib/xmlrpc/server.py +++ b/Lib/xmlrpc/server.py @@ -239,7 +239,7 @@ def register_multicall_functions(self): see http://www.xmlrpc.com/discuss/msgReader$1208""" - self.funcs.update({'system.multicall' : self.system_multicall}) + self.funcs['system.multicall'] = self.system_multicall def _marshaled_dispatch(self, data, dispatch_method = None, path = None): """Dispatches an XML-RPC method from marshalled (XML) data. @@ -578,7 +578,7 @@ class SimpleXMLRPCServer(socketserver.TCPServer, """ allow_reuse_address = True - allow_reuse_port = True + allow_reuse_port = False # Warning: this is for debugging purposes only! Never set this to True in # production code, as will be sending out sensitive information (exception diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 88356abe8cbaeb..ac2332e58468a2 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -38,8 +38,8 @@ __all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", - "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", - "Path"] + "ZIP_ZSTANDARD", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", + "LargeZipFile", "Path"] class BadZipFile(Exception): pass @@ -234,8 +234,19 @@ def strip(cls, data, xids): def _check_zipfile(fp): try: - if _EndRecData(fp): - return True # file has correct magic number + endrec = _EndRecData(fp) + if endrec: + if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0: + return True # Empty zipfiles are still zipfiles + elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]: + # Central directory is on the same disk + fp.seek(sum(_handle_prepended_data(endrec))) + if endrec[_ECD_SIZE] >= sizeCentralDir: + data = fp.read(sizeCentralDir) # CD is where we expect it to be + if len(data) == sizeCentralDir: + centdir = struct.unpack(structCentralDir, data) # CD is the right size + if centdir[_CD_SIGNATURE] == stringCentralDir: + return True # First central directory entry has correct magic number except OSError: pass return False @@ -254,24 +265,36 @@ def is_zipfile(filename): else: with open(filename, "rb") as fp: result = _check_zipfile(fp) - except OSError: + except (OSError, BadZipFile): pass return result +def _handle_prepended_data(endrec, debug=0): + size_cd = endrec[_ECD_SIZE] # bytes in central directory + offset_cd = endrec[_ECD_OFFSET] # offset of central directory + + # "concat" is zero, unless zip was concatenated to another file + concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + + if debug > 2: + inferred = concat + offset_cd + print("given, inferred, offset", offset_cd, inferred, concat) + + return offset_cd, concat + def _EndRecData64(fpin, offset, endrec): """ Read the ZIP64 end-of-archive records and use that to update endrec """ - try: - fpin.seek(offset - sizeEndCentDir64Locator, 2) - except OSError: - # If the seek fails, the file is not large enough to contain a ZIP64 + offset -= sizeEndCentDir64Locator + if offset < 0: + # The file is not large enough to contain a ZIP64 # end-of-archive record, so just return the end record we were given. return endrec - + fpin.seek(offset) data = fpin.read(sizeEndCentDir64Locator) if len(data) != sizeEndCentDir64Locator: - return endrec + raise OSError("Unknown I/O error") sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) if sig != stringEndArchive64Locator: return endrec @@ -279,16 +302,33 @@ def _EndRecData64(fpin, offset, endrec): if diskno != 0 or disks > 1: raise BadZipFile("zipfiles that span multiple disks are not supported") - # Assume no 'zip64 extensible data' - fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) + offset -= sizeEndCentDir64 + if reloff > offset: + raise BadZipFile("Corrupt zip64 end of central directory locator") + # First, check the assumption that there is no prepended data. + fpin.seek(reloff) + extrasz = offset - reloff data = fpin.read(sizeEndCentDir64) if len(data) != sizeEndCentDir64: - return endrec + raise OSError("Unknown I/O error") + if not data.startswith(stringEndArchive64) and reloff != offset: + # Since we already have seen the Zip64 EOCD Locator, it's + # possible we got here because there is prepended data. + # Assume no 'zip64 extensible data' + fpin.seek(offset) + extrasz = 0 + data = fpin.read(sizeEndCentDir64) + if len(data) != sizeEndCentDir64: + raise OSError("Unknown I/O error") + if not data.startswith(stringEndArchive64): + raise BadZipFile("Zip64 end of central directory record not found") + sig, sz, create_version, read_version, disk_num, disk_dir, \ dircount, dircount2, dirsize, diroffset = \ struct.unpack(structEndArchive64, data) - if sig != stringEndArchive64: - return endrec + if (diroffset + dirsize != reloff or + sz + 12 != sizeEndCentDir64 + extrasz): + raise BadZipFile("Corrupt zip64 end of central directory record") # Update the original endrec using data from the ZIP64 record endrec[_ECD_SIGNATURE] = sig @@ -298,6 +338,7 @@ def _EndRecData64(fpin, offset, endrec): endrec[_ECD_ENTRIES_TOTAL] = dircount2 endrec[_ECD_SIZE] = dirsize endrec[_ECD_OFFSET] = diroffset + endrec[_ECD_LOCATION] = offset - extrasz return endrec @@ -331,7 +372,7 @@ def _EndRecData(fpin): endrec.append(filesize - sizeEndCentDir) # Try to read the "Zip64 end of central directory" structure - return _EndRecData64(fpin, -sizeEndCentDir, endrec) + return _EndRecData64(fpin, filesize - sizeEndCentDir, endrec) # Either this is not a ZIP file, or it is a ZIP file with an archive # comment. Search the end of the file for the "end of central directory" @@ -355,8 +396,7 @@ def _EndRecData(fpin): endrec.append(maxCommentStart + start) # Try to read the "Zip64 end of central directory" structure - return _EndRecData64(fpin, maxCommentStart + start - filesize, - endrec) + return _EndRecData64(fpin, maxCommentStart + start, endrec) # Unable to find a valid end of central directory structure return None @@ -812,11 +852,11 @@ def _get_compressor(compress_type, compresslevel=None): if compresslevel is not None: return bz2.BZ2Compressor(compresslevel) return bz2.BZ2Compressor() - # compresslevel is ignored for ZIP_LZMA and ZIP_ZSTANDARD + # compresslevel is ignored for ZIP_LZMA elif compress_type == ZIP_LZMA: return LZMACompressor() elif compress_type == ZIP_ZSTANDARD: - return zstd.ZstdCompressor() + return zstd.ZstdCompressor(level=compresslevel) else: return None @@ -1352,7 +1392,8 @@ class ZipFile: mode: The mode can be either read 'r', write 'w', exclusive create 'x', or append 'a'. compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), - ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). + ZIP_BZIP2 (requires bz2), ZIP_LZMA (requires lzma), or + ZIP_ZSTANDARD (requires compression.zstd). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -1361,6 +1402,9 @@ class ZipFile: When using ZIP_STORED or ZIP_LZMA this keyword has no effect. When using ZIP_DEFLATED integers 0 through 9 are accepted. When using ZIP_BZIP2 integers 1 through 9 are accepted. + When using ZIP_ZSTANDARD integers -7 though 22 are common, + see the CompressionParameter enum in compression.zstd for + details. """ @@ -1421,7 +1465,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self._lock = threading.RLock() self._seekable = True self._writing = False - self._data_offset = None try: if mode == 'r': @@ -1432,7 +1475,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, self._didModify = True try: self.start_dir = self.fp.tell() - self._data_offset = self.start_dir except (AttributeError, OSError): self.fp = _Tellable(self.fp) self.start_dir = 0 @@ -1457,7 +1499,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, # even if no files are added to the archive self._didModify = True self.start_dir = self.fp.tell() - self._data_offset = self.start_dir else: raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") except: @@ -1497,28 +1538,17 @@ def _RealGetContents(self): raise BadZipFile("File is not a zip file") if self.debug > 1: print(endrec) - size_cd = endrec[_ECD_SIZE] # bytes in central directory - offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd - if endrec[_ECD_SIGNATURE] == stringEndArchive64: - # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + offset_cd, concat = _handle_prepended_data(endrec, self.debug) - # store the offset to the beginning of data for the - # .data_offset property - self._data_offset = concat - - if self.debug > 2: - inferred = concat + offset_cd - print("given, inferred, offset", offset_cd, inferred, concat) # self.start_dir: Position of start of central directory self.start_dir = offset_cd + concat + if self.start_dir < 0: raise BadZipFile("Bad offset for central directory") fp.seek(self.start_dir, 0) + size_cd = endrec[_ECD_SIZE] data = fp.read(size_cd) fp = io.BytesIO(data) total = 0 @@ -1575,12 +1605,6 @@ def _RealGetContents(self): zinfo._end_offset = end_offset end_offset = zinfo.header_offset - @property - def data_offset(self): - """The offset to the start of zip data in the file or None if - unavailable.""" - return self._data_offset - def namelist(self): """Return a list of file names in the archive.""" return [data.filename for data in self.filelist] @@ -2093,6 +2117,8 @@ def _write_end_record(self): min_version = max(BZIP2_VERSION, min_version) elif zinfo.compress_type == ZIP_LZMA: min_version = max(LZMA_VERSION, min_version) + elif zinfo.compress_type == ZIP_ZSTANDARD: + min_version = max(ZSTANDARD_VERSION, min_version) extract_version = max(min_version, zinfo.extract_version) create_version = max(min_version, zinfo.create_version) @@ -2129,7 +2155,7 @@ def _write_end_record(self): " would require ZIP64 extensions") zip64endrec = struct.pack( structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, centDirCount, centDirCount, + sizeEndCentDir64 - 12, 45, 45, 0, 0, centDirCount, centDirCount, centDirSize, centDirOffset) self.fp.write(zip64endrec) diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 5ae16ec970dda4..80f5d607731fd7 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -7,19 +7,18 @@ for more detail. """ +import contextlib import io -import posixpath -import zipfile import itertools -import contextlib import pathlib +import posixpath import re import stat import sys +import zipfile from .glob import Translator - __all__ = ['Path'] @@ -192,11 +191,13 @@ def _name_set(self): self.__lookup = super()._name_set() return self.__lookup - def _extract_text_encoding(encoding=None, *args, **kwargs): # compute stack level so that the caller of the caller sees any warning. is_pypy = sys.implementation.name == 'pypy' - stack_level = 3 + is_pypy + # PyPy no longer special cased after 7.3.19 (or maybe 7.3.18) + # See jaraco/zipp#143 + is_old_pypi = is_pypy and sys.pypy_version_info < (7, 3, 19) + stack_level = 3 + is_old_pypi return io.text_encoding(encoding, stack_level), args, kwargs @@ -351,7 +352,7 @@ def open(self, mode='r', *args, pwd=None, **kwargs): return io.TextIOWrapper(stream, encoding, *args, **kwargs) def _base(self): - return pathlib.PurePosixPath(self.at or self.root.filename) + return pathlib.PurePosixPath(self.at) if self.at else self.filename @property def name(self): diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py index d7fe45a494717a..bd2839304b7db2 100644 --- a/Lib/zipfile/_path/glob.py +++ b/Lib/zipfile/_path/glob.py @@ -1,7 +1,6 @@ import os import re - _default_seps = os.sep + str(os.altsep) * bool(os.altsep) diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py index f5510ee0497513..df2ae909f53cf2 100644 --- a/Lib/zoneinfo/__init__.py +++ b/Lib/zoneinfo/__init__.py @@ -12,7 +12,10 @@ try: from _zoneinfo import ZoneInfo -except ImportError: # pragma: nocover +except (ImportError, AttributeError): # pragma: nocover + # AttributeError: module 'datetime' has no attribute 'datetime_CAPI'. + # This happens when the '_datetime' module is not available and the + # pure Python implementation is used instead. from ._zoneinfo import ZoneInfo reset_tzpath = _tzpath.reset_tzpath diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py index 6e05abc32394bc..03cc42149f9b74 100644 --- a/Lib/zoneinfo/_common.py +++ b/Lib/zoneinfo/_common.py @@ -9,9 +9,13 @@ def load_tzdata(key): resource_name = components[-1] try: - return resources.files(package_name).joinpath(resource_name).open("rb") + path = resources.files(package_name).joinpath(resource_name) + # gh-85702: Prevent PermissionError on Windows + if path.is_dir(): + raise IsADirectoryError + return path.open("rb") except (ImportError, FileNotFoundError, UnicodeEncodeError, IsADirectoryError): - # There are three types of exception that can be raised that all amount + # There are four types of exception that can be raised that all amount # to "we cannot find this key": # # ImportError: If package_name doesn't exist (e.g. if tzdata is not diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py index 5db17bea045d8c..177d32c35eff29 100644 --- a/Lib/zoneinfo/_tzpath.py +++ b/Lib/zoneinfo/_tzpath.py @@ -13,6 +13,13 @@ def _reset_tzpath(to=None, stacklevel=4): + f"not {type(tzpaths)}: {tzpaths!r}" ) + tzpaths = [os.fspath(p) for p in tzpaths] + if not all(isinstance(p, str) for p in tzpaths): + raise TypeError( + "All elements of a tzpath sequence must be strings or " + "os.PathLike objects which convert to strings." + ) + if not all(map(os.path.isabs, tzpaths)): raise ValueError(_get_invalid_paths_message(tzpaths)) base_tzpath = tzpaths @@ -124,7 +131,8 @@ def available_timezones(): # Start with loading from the tzdata package if it exists: this has a # pre-assembled list of zones that only requires opening one file. try: - with resources.files("tzdata").joinpath("zones").open("r") as f: + zones_file = resources.files("tzdata").joinpath("zones") + with zones_file.open("r", encoding="utf-8") as f: for zone in f: zone = zone.strip() if zone: diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py index b77dc0ed391bb3..3ffdb4c837192b 100644 --- a/Lib/zoneinfo/_zoneinfo.py +++ b/Lib/zoneinfo/_zoneinfo.py @@ -75,12 +75,12 @@ def _new_instance(cls, key): return obj @classmethod - def from_file(cls, fobj, /, key=None): + def from_file(cls, file_obj, /, key=None): obj = super().__new__(cls) obj._key = key obj._file_path = None - obj._load_file(fobj) - obj._file_repr = repr(fobj) + obj._load_file(file_obj) + obj._file_repr = repr(file_obj) # Disable pickling for objects created from files obj.__reduce__ = obj._file_reduce diff --git a/Mac/BuildScript/build-installer.py b/Mac/BuildScript/build-installer.py index b31cb766a468f4..4fd8d55f35ad57 100755 --- a/Mac/BuildScript/build-installer.py +++ b/Mac/BuildScript/build-installer.py @@ -246,9 +246,9 @@ def library_recipes(): result.extend([ dict( - name="OpenSSL 3.0.16", - url="https://github.com/openssl/openssl/releases/download/openssl-3.0.16/openssl-3.0.16.tar.gz", - checksum='57e03c50feab5d31b152af2b764f10379aecd8ee92f16c985983ce4a99f7ef86', + name="OpenSSL 3.0.18", + url="https://github.com/openssl/openssl/releases/download/openssl-3.0.18/openssl-3.0.18.tar.gz", + checksum='d80c34f5cf902dccf1f1b5df5ebb86d0392e37049e5d73df1b3abae72e4ffe8b', buildrecipe=build_universal_openssl, configure=None, install=None, @@ -264,10 +264,10 @@ def library_recipes(): tk_patches = ['backport_gh71383_fix.patch', 'tk868_on_10_8_10_9.patch', 'backport_gh110950_fix.patch'] else: - tcl_tk_ver='8.6.16' - tcl_checksum='91cb8fa61771c63c262efb553059b7c7ad6757afa5857af6265e4b0bdc2a14a5' + tcl_tk_ver='8.6.17' + tcl_checksum='a3903371efcce8a405c5c245d029e9f6850258a60fa3761c4d58995610949b31' - tk_checksum='be9f94d3575d4b3099d84bc3c10de8994df2d7aa405208173c709cc404a7e5fe' + tk_checksum='e4982df6f969c08bf9dd858a6891059b4a3f50dc6c87c10abadbbe2fc4838946' tk_patches = [] @@ -359,9 +359,9 @@ def library_recipes(): ), ), dict( - name="SQLite 3.49.1", - url="https://sqlite.org/2025/sqlite-autoconf-3490100.tar.gz", - checksum="106642d8ccb36c5f7323b64e4152e9b719f7c0215acf5bfeac3d5e7f97b59254", + name="SQLite 3.50.4", + url="https://www.sqlite.org/2025/sqlite-autoconf-3500400.tar.gz", + checksum="a3db587a1b92ee5ddac2f66b3edb41b26f9c867275782d46c3a088977d6a5b18", extra_cflags=('-Os ' '-DSQLITE_ENABLE_FTS5 ' '-DSQLITE_ENABLE_FTS4 ' @@ -1747,7 +1747,7 @@ def main(): fn = os.path.join(folder, "ReadMe.rtf") patchFile("resources/ReadMe.rtf", fn) fn = os.path.join(folder, "Update Shell Profile.command") - patchScript("scripts/postflight.patch-profile", fn) + patchScript("resources/update_shell_profile.command", fn) fn = os.path.join(folder, "Install Certificates.command") patchScript("resources/install_certificates.command", fn) os.chmod(folder, STAT_0o755) diff --git a/Mac/BuildScript/resources/update_shell_profile.command b/Mac/BuildScript/resources/update_shell_profile.command new file mode 100755 index 00000000000000..3cf4d74de9f09a --- /dev/null +++ b/Mac/BuildScript/resources/update_shell_profile.command @@ -0,0 +1,116 @@ +#!/bin/sh + +echo "This script will update your shell profile when the 'bin' directory" +echo "of python is not early enough of the PATH of your shell." +echo "These changes will be effective only in shell windows that you open" +echo "after running this script." + +PYVER=@PYVER@ +PYTHON_ROOT="/Library/Frameworks/Python.framework/Versions/@PYVER@" + +if [ `id -ur` = 0 ]; then + # Run from the installer, do some trickery to fetch the information + # we need. + theShell="`finger $USER | grep Shell: | head -1 | awk '{ print $NF }'`" + +else + theShell="${SHELL}" +fi + +# Make sure the directory ${PYTHON_ROOT}/bin is on the users PATH. +BSH="`basename "${theShell}"`" +case "${BSH}" in +bash|ksh|sh|*csh|zsh|fish) + if [ `id -ur` = 0 ]; then + P=`su - ${USER} -c 'echo A-X-4-X@@$PATH@@X-4-X-A' | grep 'A-X-4-X@@.*@@X-4-X-A' | sed -e 's/^A-X-4-X@@//g' -e 's/@@X-4-X-A$//g'` + else + P="`(exec -l ${theShell} -c 'echo $PATH')`" + fi + ;; +*) + echo "Sorry, I don't know how to patch $BSH shells" + exit 0 + ;; +esac + +# Now ensure that our bin directory is on $P and before /usr/bin at that +for elem in `echo $P | tr ':' ' '` +do + if [ "${elem}" = "${PYTHON_ROOT}/bin" ]; then + echo "All right, you're a python lover already" + exit 0 + elif [ "${elem}" = "/usr/bin" ]; then + break + fi +done + +echo "${PYTHON_ROOT}/bin is not on your PATH or at least not early enough" +case "${BSH}" in +*csh) + if [ -f "${HOME}/.tcshrc" ]; then + RC="${HOME}/.tcshrc" + else + RC="${HOME}/.cshrc" + fi + # Create backup copy before patching + if [ -f "${RC}" ]; then + cp -fp "${RC}" "${RC}.pysave" + fi + echo "" >> "${RC}" + echo "# Setting PATH for Python ${PYVER}" >> "${RC}" + echo "# The original version is saved in .cshrc.pysave" >> "${RC}" + echo "set path=(${PYTHON_ROOT}/bin "'$path'")" >> "${RC}" + if [ `id -ur` = 0 ]; then + chown -h "${USER}" "${RC}" + fi + exit 0 + ;; +bash) + if [ -e "${HOME}/.bash_profile" ]; then + PR="${HOME}/.bash_profile" + elif [ -e "${HOME}/.bash_login" ]; then + PR="${HOME}/.bash_login" + elif [ -e "${HOME}/.profile" ]; then + PR="${HOME}/.profile" + else + PR="${HOME}/.bash_profile" + fi + ;; +fish) + CONFIG_DIR="${HOME}/.config/fish/conf.d/" + RC="${CONFIG_DIR}/python-${PYVER}.fish" + mkdir -p "$CONFIG_DIR" + if [ -f "${RC}" ]; then + cp -fp "${RC}" "${RC}.pysave" + fi + echo "# Setting PATH for Python ${PYVER}" > "${RC}" + if [ -f "${RC}.pysave" ]; then + echo "# The original version is saved in ${RC}.pysave" >> "${RC}" + fi + echo "fish_add_path -g \"${PYTHON_ROOT}/bin\"" >> "${RC}" + if [ `id -ur` = 0 ]; then + chown -h "${USER}" "${RC}" + fi + exit 0 + ;; +zsh) + PR="${HOME}/.zprofile" + ;; +*sh) + PR="${HOME}/.profile" + ;; +esac + +# Create backup copy before patching +if [ -f "${PR}" ]; then + cp -fp "${PR}" "${PR}.pysave" +fi +echo "" >> "${PR}" +echo "# Setting PATH for Python ${PYVER}" >> "${PR}" +echo "# The original version is saved in `basename ${PR}`.pysave" >> "${PR}" +echo 'PATH="'"${PYTHON_ROOT}/bin"':${PATH}"' >> "${PR}" +echo 'export PATH' >> "${PR}" +if [ `id -ur` = 0 ]; then + chown -h "${USER}" "${PR}" +fi +exit 0 diff --git a/Mac/BuildScript/scripts/postflight.patch-profile b/Mac/BuildScript/scripts/postflight.patch-profile index 9caf62211ddd16..ce8720f895d1b5 100755 --- a/Mac/BuildScript/scripts/postflight.patch-profile +++ b/Mac/BuildScript/scripts/postflight.patch-profile @@ -1,116 +1,104 @@ #!/bin/sh -echo "This script will update your shell profile when the 'bin' directory" -echo "of python is not early enough of the PATH of your shell." -echo "These changes will be effective only in shell windows that you open" -echo "after running this script." - PYVER=@PYVER@ PYTHON_ROOT="/Library/Frameworks/Python.framework/Versions/@PYVER@" -if [ `id -ur` = 0 ]; then - # Run from the installer, do some trickery to fetch the information - # we need. - theShell="`finger $USER | grep Shell: | head -1 | awk '{ print $NF }'`" -else - theShell="${SHELL}" -fi +# Run from the installer, do some trickery to fetch the information +# we need. +theShell="`finger $USER | grep Shell: | head -1 | awk '{ print $NF }'`" # Make sure the directory ${PYTHON_ROOT}/bin is on the users PATH. BSH="`basename "${theShell}"`" case "${BSH}" in bash|ksh|sh|*csh|zsh|fish) - if [ `id -ur` = 0 ]; then - P=`su - ${USER} -c 'echo A-X-4-X@@$PATH@@X-4-X-A' | grep 'A-X-4-X@@.*@@X-4-X-A' | sed -e 's/^A-X-4-X@@//g' -e 's/@@X-4-X-A$//g'` - else - P="`(exec -l ${theShell} -c 'echo $PATH')`" - fi - ;; + true + ;; *) - echo "Sorry, I don't know how to patch $BSH shells" - exit 0 - ;; + exit 0 + ;; esac -# Now ensure that our bin directory is on $P and before /usr/bin at that -for elem in `echo $P | tr ':' ' '` -do - if [ "${elem}" = "${PYTHON_ROOT}/bin" ]; then - echo "All right, you're a python lover already" - exit 0 - elif [ "${elem}" = "/usr/bin" ]; then - break - fi -done - -echo "${PYTHON_ROOT}/bin is not on your PATH or at least not early enough" case "${BSH}" in *csh) - if [ -f "${HOME}/.tcshrc" ]; then - RC="${HOME}/.tcshrc" - else - RC="${HOME}/.cshrc" - fi - # Create backup copy before patching - if [ -f "${RC}" ]; then - cp -fp "${RC}" "${RC}.pysave" - fi - echo "" >> "${RC}" - echo "# Setting PATH for Python ${PYVER}" >> "${RC}" - echo "# The original version is saved in .cshrc.pysave" >> "${RC}" - echo "set path=(${PYTHON_ROOT}/bin "'$path'")" >> "${RC}" - if [ `id -ur` = 0 ]; then - chown "${USER}" "${RC}" - fi - exit 0 - ;; + if [ -f "${HOME}/.tcshrc" ]; then + RC="${HOME}/.tcshrc" + else + RC="${HOME}/.cshrc" + fi + + # Drop privileges while writing files. + su -m ${USER} <<EOFC + # Create backup copy before patching + if [ -f "${RC}" ]; then + cp -fp "${RC}" "${RC}.pysave" + fi + echo "" >> "${RC}" + echo "# Setting PATH for Python ${PYVER}" >> "${RC}" + echo "# The original version is saved in .cshrc.pysave" >> "${RC}" + echo "set path=(${PYTHON_ROOT}/bin "'\$path'")" >> "${RC}" +EOFC + + if [ `id -ur` = 0 ]; then + chown -h "${USER}" "${RC}" + fi + exit 0 + ;; bash) - if [ -e "${HOME}/.bash_profile" ]; then - PR="${HOME}/.bash_profile" - elif [ -e "${HOME}/.bash_login" ]; then - PR="${HOME}/.bash_login" - elif [ -e "${HOME}/.profile" ]; then - PR="${HOME}/.profile" - else - PR="${HOME}/.bash_profile" - fi - ;; + if [ -e "${HOME}/.bash_profile" ]; then + PR="${HOME}/.bash_profile" + elif [ -e "${HOME}/.bash_login" ]; then + PR="${HOME}/.bash_login" + elif [ -e "${HOME}/.profile" ]; then + PR="${HOME}/.profile" + else + PR="${HOME}/.bash_profile" + fi + ;; fish) - CONFIG_DIR="${HOME}/.config/fish/conf.d/" - RC="${CONFIG_DIR}/python-${PYVER}.fish" - mkdir -p "$CONFIG_DIR" - if [ -f "${RC}" ]; then - cp -fp "${RC}" "${RC}.pysave" - fi - echo "# Setting PATH for Python ${PYVER}" > "${RC}" - if [ -f "${RC}.pysave" ]; then - echo "# The original version is saved in ${RC}.pysave" >> "${RC}" - fi - echo "fish_add_path -g \"${PYTHON_ROOT}/bin\"" >> "${RC}" - if [ `id -ur` = 0 ]; then - chown "${USER}" "${RC}" - fi - exit 0 - ;; + CONFIG_DIR="${HOME}/.config/fish/conf.d/" + RC="${CONFIG_DIR}/python-${PYVER}.fish" + + # Drop privileges while writing files. + su -m ${USER} <<EOFF + mkdir -p "$CONFIG_DIR" + if [ -f "${RC}" ]; then + cp -fp "${RC}" "${RC}.pysave" + fi + echo "# Setting PATH for Python ${PYVER}" > "${RC}" + if [ -f "${RC}.pysave" ]; then + echo "# The original version is saved in ${RC}.pysave" >> "${RC}" + fi + echo "fish_add_path -g \"${PYTHON_ROOT}/bin\"" >> "${RC}" +EOFF + + if [ `id -ur` = 0 ]; then + chown -h "${USER}" "${RC}" + fi + exit 0 + ;; zsh) - PR="${HOME}/.zprofile" - ;; + PR="${HOME}/.zprofile" + ;; *sh) - PR="${HOME}/.profile" - ;; + PR="${HOME}/.profile" + ;; esac +# Drop privileges while writing files. +su -m ${USER} <<EOFS # Create backup copy before patching if [ -f "${PR}" ]; then - cp -fp "${PR}" "${PR}.pysave" + cp -fp "${PR}" "${PR}.pysave" fi echo "" >> "${PR}" echo "# Setting PATH for Python ${PYVER}" >> "${PR}" echo "# The original version is saved in `basename ${PR}`.pysave" >> "${PR}" -echo 'PATH="'"${PYTHON_ROOT}/bin"':${PATH}"' >> "${PR}" +echo 'PATH="'"${PYTHON_ROOT}/bin"':\${PATH}"' >> "${PR}" echo 'export PATH' >> "${PR}" +EOFS + if [ `id -ur` = 0 ]; then - chown "${USER}" "${PR}" + chown -h "${USER}" "${PR}" fi exit 0 diff --git a/Makefile.pre.in b/Makefile.pre.in index 17e0c9904cc3aa..08ad5f4921dd5f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -801,10 +801,10 @@ build_all: check-clean-src check-app-store-compliance $(BUILDPYTHON) platform sh .PHONY: build_wasm build_wasm: check-clean-src $(BUILDPYTHON) platform sharedmods \ - python-config checksharedmods + python-config checksharedmods build-details.json .PHONY: build_emscripten -build_emscripten: build_wasm web_example +build_emscripten: build_wasm web_example web_example_pyrepl_jspi # Check that the source is clean when building out of source. .PHONY: check-clean-src @@ -1012,7 +1012,12 @@ $(LIBRARY): $(LIBRARY_OBJS) $(AR) $(ARFLAGS) $@ $(LIBRARY_OBJS) libpython$(LDVERSION).so: $(LIBRARY_OBJS) $(DTRACE_OBJS) - $(BLDSHARED) -Wl,-h$(INSTSONAME) -o $(INSTSONAME) $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) + # AIX Linker don't support "-h" option + if test "$(MACHDEP)" != "aix"; then \ + $(BLDSHARED) -Wl,-h$(INSTSONAME) -o $(INSTSONAME) $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM); \ + else \ + $(BLDSHARED) -o $@ $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM); \ + fi if test $(INSTSONAME) != $@; then \ $(LN) -f $(INSTSONAME) $@; \ fi @@ -1090,8 +1095,17 @@ $(DLLLIBRARY) libpython$(LDVERSION).dll.a: $(LIBRARY_OBJS) # wasm32-emscripten browser web example -WEBEX_DIR=$(srcdir)/Tools/wasm/emscripten/web_example/ -web_example/python.html: $(WEBEX_DIR)/python.html +EMSCRIPTEN_DIR=$(srcdir)/Tools/wasm/emscripten +WEBEX_DIR=$(EMSCRIPTEN_DIR)/web_example/ + +ZIP_STDLIB=python$(VERSION)$(ABI_THREAD).zip +$(ZIP_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ + $(EMSCRIPTEN_DIR)/wasm_assets.py \ + Makefile pybuilddir.txt Modules/Setup.local + $(PYTHON_FOR_BUILD) $(EMSCRIPTEN_DIR)/wasm_assets.py \ + --buildroot . --prefix $(prefix) -o $@ + +web_example/index.html: $(WEBEX_DIR)/index.html @mkdir -p web_example @cp $< $@ @@ -1103,12 +1117,9 @@ web_example/server.py: $(WEBEX_DIR)/server.py @mkdir -p web_example @cp $< $@ -WEB_STDLIB=web_example/python$(VERSION)$(ABI_THREAD).zip -$(WEB_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ - $(WEBEX_DIR)/wasm_assets.py \ - Makefile pybuilddir.txt Modules/Setup.local - $(PYTHON_FOR_BUILD) $(WEBEX_DIR)/wasm_assets.py \ - --buildroot . --prefix $(prefix) -o $@ +web_example/$(ZIP_STDLIB): $(ZIP_STDLIB) + @mkdir -p web_example + @cp $< $@ web_example/python.mjs web_example/python.wasm: $(BUILDPYTHON) @if test $(HOST_GNU_TYPE) != 'wasm32-unknown-emscripten' ; then \ @@ -1119,7 +1130,35 @@ web_example/python.mjs web_example/python.wasm: $(BUILDPYTHON) cp python.wasm web_example/python.wasm .PHONY: web_example -web_example: web_example/python.mjs web_example/python.worker.mjs web_example/python.html web_example/server.py $(WEB_STDLIB) +web_example: web_example/python.mjs web_example/python.worker.mjs web_example/index.html web_example/server.py web_example/$(ZIP_STDLIB) + +WEBEX2=web_example_pyrepl_jspi +WEBEX2_DIR=$(EMSCRIPTEN_DIR)/$(WEBEX2)/ + +$(WEBEX2)/python.mjs $(WEBEX2)/python.wasm: $(BUILDPYTHON) + @if test $(HOST_GNU_TYPE) != 'wasm32-unknown-emscripten' ; then \ + echo "Can only build web_example when target is Emscripten" ;\ + exit 1 ;\ + fi + @mkdir -p $(WEBEX2) + @cp python.mjs $(WEBEX2)/python.mjs + @cp python.wasm $(WEBEX2)/python.wasm + +$(WEBEX2)/index.html: $(WEBEX2_DIR)/index.html + @mkdir -p $(WEBEX2) + @cp $< $@ + +$(WEBEX2)/src.mjs: $(WEBEX2_DIR)/src.mjs + @mkdir -p $(WEBEX2) + @cp $< $@ + +$(WEBEX2)/$(ZIP_STDLIB): $(ZIP_STDLIB) + @mkdir -p $(WEBEX2) + @cp $< $@ + +.PHONY: web_example_pyrepl_jspi +web_example_pyrepl_jspi: $(WEBEX2)/python.mjs $(WEBEX2)/index.html $(WEBEX2)/src.mjs $(WEBEX2)/$(ZIP_STDLIB) + ############################################################################ # Header files @@ -1201,6 +1240,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/unicodeobject.h \ $(srcdir)/Include/warnings.h \ $(srcdir)/Include/weakrefobject.h \ + $(srcdir)/Python/remote_debug.h \ \ pyconfig.h \ $(PARSER_HEADERS) \ @@ -2190,7 +2230,7 @@ Python/frozen.o: $(FROZEN_FILES_OUT) # an include guard, so we can't use a pipeline to transform its output. Include/pydtrace_probes.h: $(srcdir)/Include/pydtrace.d $(MKDIR_P) Include - CC="$(CC)" CFLAGS="$(CFLAGS)" $(DTRACE) $(DFLAGS) -o $@ -h -s $< + CC="$(CC)" CFLAGS="$(CFLAGS)" $(DTRACE) $(DFLAGS) -o $@ -h -s $(srcdir)/Include/pydtrace.d : sed in-place edit with POSIX-only tools sed 's/PYTHON_/PyDTrace_/' $@ > $@.tmp mv $@.tmp $@ @@ -2200,7 +2240,7 @@ Python/gc.o: $(srcdir)/Include/pydtrace.h Python/import.o: $(srcdir)/Include/pydtrace.h Python/pydtrace.o: $(srcdir)/Include/pydtrace.d $(DTRACE_DEPS) - CC="$(CC)" CFLAGS="$(CFLAGS)" $(DTRACE) $(DFLAGS) -o $@ -G -s $< $(DTRACE_DEPS) + CC="$(CC)" CFLAGS="$(CFLAGS)" $(DTRACE) $(DFLAGS) -o $@ -G -s $(srcdir)/Include/pydtrace.d $(DTRACE_DEPS) Objects/typeobject.o: Objects/typeslots.inc @@ -2279,7 +2319,7 @@ testios: fi # Clone the testbed project into the XCFOLDER - $(PYTHON_FOR_BUILD) $(srcdir)/iOS/testbed clone --framework $(PYTHONFRAMEWORKPREFIX) "$(XCFOLDER)" + $(PYTHON_FOR_BUILD) $(srcdir)/Apple/testbed clone --framework $(PYTHONFRAMEWORKPREFIX) "$(XCFOLDER)" # Run the testbed project $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run --verbose -- test -uall --single-process --rerun -W @@ -2507,9 +2547,8 @@ maninstall: altmaninstall XMLLIBSUBDIRS= xml xml/dom xml/etree xml/parsers xml/sax LIBSUBDIRS= asyncio \ collections \ - compression compression/bz2 compression/gzip compression/zstd \ - compression/lzma compression/zlib compression/_common \ - concurrent concurrent/futures \ + compression compression/_common compression/zstd \ + concurrent concurrent/futures concurrent/interpreters \ csv \ ctypes ctypes/macholib \ curses \ @@ -2568,7 +2607,6 @@ TESTSUBDIRS= idlelib/idle_test \ test/subprocessdata \ test/support \ test/support/_hypothesis_stubs \ - test/support/interpreters \ test/test_asyncio \ test/test_capi \ test/test_cext \ @@ -2995,6 +3033,9 @@ frameworkinstallunversionedstructure: $(LDLIBRARY) $(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(PYTHONFRAMEWORKINSTALLDIR) sed 's/%VERSION%/'"`$(RUNSHARED) $(PYTHON_FOR_BUILD) -c 'import platform; print(platform.python_version())'`"'/g' < $(RESSRCDIR)/Info.plist > $(DESTDIR)$(PYTHONFRAMEWORKINSTALLDIR)/Info.plist $(INSTALL_SHARED) $(LDLIBRARY) $(DESTDIR)$(PYTHONFRAMEWORKPREFIX)/$(LDLIBRARY) + $(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(LIBDIR) + $(LN) -fs "../$(LDLIBRARY)" "$(DESTDIR)$(prefix)/lib/libpython$(LDVERSION).dylib" + $(LN) -fs "../$(LDLIBRARY)" "$(DESTDIR)$(prefix)/lib/libpython$(VERSION).dylib" $(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(BINDIR) for file in $(srcdir)/$(RESSRCDIR)/bin/* ; do \ $(INSTALL) -m $(EXEMODE) $$file $(DESTDIR)$(BINDIR); \ @@ -3067,6 +3108,12 @@ config.status: $(srcdir)/configure Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< +Python/emscripten_trampoline_inner.wasm: $(srcdir)/Python/emscripten_trampoline_inner.c + # emcc has a path that ends with emsdk/upstream/emscripten/emcc, we're looking for emsdk/upstream/bin/clang. + $$(dirname $$(dirname $(CC)))/bin/clang -o $@ $< -mgc -O2 -Wl,--no-entry -Wl,--import-table -Wl,--import-memory -target wasm32-unknown-unknown -nostdlib + +Python/emscripten_trampoline_wasm.c: Python/emscripten_trampoline_inner.wasm + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/wasm/emscripten/prepare_external_wasm.py $< $@ getWasmTrampolineModule JIT_DEPS = \ $(srcdir)/Tools/jit/*.c \ @@ -3179,10 +3226,10 @@ clean-retain-profile: pycremoval -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h -rm -f profile-gen-stamp - -rm -rf iOS/testbed/Python.xcframework/ios-*/bin - -rm -rf iOS/testbed/Python.xcframework/ios-*/lib - -rm -rf iOS/testbed/Python.xcframework/ios-*/include - -rm -rf iOS/testbed/Python.xcframework/ios-*/Python.framework + -rm -rf Apple/iOS/testbed/Python.xcframework/ios-*/bin + -rm -rf Apple/iOS/testbed/Python.xcframework/ios-*/lib + -rm -rf Apple/iOS/testbed/Python.xcframework/ios-*/include + -rm -rf Apple/iOS/testbed/Python.xcframework/ios-*/Python.framework .PHONY: profile-removal profile-removal: @@ -3208,7 +3255,7 @@ clobber: clean config.cache config.log pyconfig.h Modules/config.c -rm -rf build platform -rm -rf $(PYTHONFRAMEWORKDIR) - -rm -rf iOS/Frameworks + -rm -rf Apple/iOS/Frameworks -rm -rf iOSTestbed.* -rm -f python-config.py python-config -rm -rf cross-build @@ -3248,6 +3295,11 @@ check-c-globals: --format summary \ --traceback +# Check for undocumented C APIs. +.PHONY: check-c-api-docs +check-c-api-docs: + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py + # Find files with funny names .PHONY: funny funny: @@ -3341,7 +3393,7 @@ MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_tes MODULE__TESTLIMITEDCAPI_DEPS=$(srcdir)/Modules/_testlimitedcapi/testcapi_long.h $(srcdir)/Modules/_testlimitedcapi/parts.h $(srcdir)/Modules/_testlimitedcapi/util.h MODULE__TESTINTERNALCAPI_DEPS=$(srcdir)/Modules/_testinternalcapi/parts.h MODULE__SQLITE3_DEPS=$(srcdir)/Modules/_sqlite/connection.h $(srcdir)/Modules/_sqlite/cursor.h $(srcdir)/Modules/_sqlite/microprotocols.h $(srcdir)/Modules/_sqlite/module.h $(srcdir)/Modules/_sqlite/prepare_protocol.h $(srcdir)/Modules/_sqlite/row.h $(srcdir)/Modules/_sqlite/util.h -MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h +MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h $(srcdir)/Modules/_zstd/zstddict.h CODECS_COMMON_HEADERS=$(srcdir)/Modules/cjkcodecs/multibytecodec.h $(srcdir)/Modules/cjkcodecs/cjkcodecs.h MODULE__CODECS_CN_DEPS=$(srcdir)/Modules/cjkcodecs/mappings_cn.h $(CODECS_COMMON_HEADERS) diff --git a/Misc/ACKS b/Misc/ACKS index 610dcf9f4238de..15957d68a574d1 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -478,9 +478,11 @@ Dean Draayer Fred L. Drake, Jr. Mehdi Drissi Derk Drukker +Weilin Du John DuBois Paul Dubois Jacques Ducasse +Jadon Duff Andrei Dorian Duma Graham Dumpleton Quinn Dunkan @@ -658,6 +660,7 @@ Michael Goderbauer Karan Goel Jeroen Van Goey Christoph Gohlke +Daniel Golding Tim Golden Yonatan Goldschmidt Mark Gollahon @@ -763,6 +766,7 @@ Chris Herborth Ivan Herman Jürgen Hermann Joshua Jay Herman +Kevin Hernandez Gary Herron Ernie Hershey Thomas Herve @@ -900,6 +904,7 @@ Jim Jewett Pedro Diaz Jimenez Orjan Johansen Fredrik Johansson +Benjamin K. Johnson Gregory K. Johnson Kent Johnson Michael Johnson @@ -940,6 +945,7 @@ Anton Kasyanov Lou Kates Makoto Kato Irit Katriel +Kattni Hiroaki Kawai Dmitry Kazakov Brian Kearns @@ -1050,6 +1056,7 @@ Alexander Lakeev David Lam Thomas Lamb Valerie Lambert +Kliment Lamonov Peter Lamut Jean-Baptiste "Jiba" Lamy Ronan Lamy @@ -2096,6 +2103,7 @@ Xiang Zhang Robert Xiao Florent Xicluna Yanbo, Xie +Kaisheng Xu Xinhang Xu Arnon Yaari Alakshendra Yadav diff --git a/Misc/NEWS.d/3.10.0a1.rst b/Misc/NEWS.d/3.10.0a1.rst index f09842f1e77dea..473e7c7ac0f574 100644 --- a/Misc/NEWS.d/3.10.0a1.rst +++ b/Misc/NEWS.d/3.10.0a1.rst @@ -3275,8 +3275,8 @@ Types created with :c:func:`PyType_FromSpec` now make any signature in their .. nonce: u6Xfr2 .. section: C API -Fix bug in PyOS_mystrnicmp and PyOS_mystricmp that incremented pointers -beyond the end of a string. +Fix bug in :c:func:`PyOS_mystrnicmp` and :c:func:`PyOS_mystricmp` that +incremented pointers beyond the end of a string. .. diff --git a/Misc/NEWS.d/3.10.0b1.rst b/Misc/NEWS.d/3.10.0b1.rst index 406a5d7853edc0..5bc78b9007afa8 100644 --- a/Misc/NEWS.d/3.10.0b1.rst +++ b/Misc/NEWS.d/3.10.0b1.rst @@ -402,8 +402,8 @@ the heap. Should speed up dispatch in the interpreter. .. nonce: eUn4p5 .. section: Core and Builtins -Static methods (:func:`@staticmethod <staticmethod>`) and class methods -(:func:`@classmethod <classmethod>`) now inherit the method attributes +Static methods (:deco:`staticmethod`) and class methods +(:deco:`classmethod`) now inherit the method attributes (``__module__``, ``__name__``, ``__qualname__``, ``__doc__``, ``__annotations__``) and have a new ``__wrapped__`` attribute. Patch by Victor Stinner. @@ -454,7 +454,7 @@ file locations. .. nonce: VSF3vg .. section: Core and Builtins -Static methods (:func:`@staticmethod <staticmethod>`) are now callable as +Static methods (:deco:`staticmethod`) are now callable as regular functions. Patch by Victor Stinner. .. diff --git a/Misc/NEWS.d/3.11.0a1.rst b/Misc/NEWS.d/3.11.0a1.rst index 0b49c2a78771d2..ab998ca73abf40 100644 --- a/Misc/NEWS.d/3.11.0a1.rst +++ b/Misc/NEWS.d/3.11.0a1.rst @@ -2953,7 +2953,7 @@ support for Metadata 2.2. .. nonce: xTUyyX .. section: Library -Remove the :func:`@asyncio.coroutine <asyncio.coroutine>` :term:`decorator` +Remove the :deco:`asyncio.coroutine` :term:`decorator` enabling legacy generator-based coroutines to be compatible with async/await code; remove :class:`asyncio.coroutines.CoroWrapper` used for wrapping legacy coroutine objects in the debug mode. The decorator has been diff --git a/Misc/NEWS.d/3.11.0b1.rst b/Misc/NEWS.d/3.11.0b1.rst index c3a1942b881ad4..7b8b983ebf951e 100644 --- a/Misc/NEWS.d/3.11.0b1.rst +++ b/Misc/NEWS.d/3.11.0b1.rst @@ -664,7 +664,7 @@ for :func:`os.fcopyfile` available in macOs. .. nonce: l1p7CJ .. section: Library -For :func:`@dataclass <dataclasses.dataclass>`, add *weakref_slot*. +For :deco:`~dataclasses.dataclass`, add *weakref_slot*. The new parameter defaults to ``False``. If true, and if ``slots=True``, add a slot named ``"__weakref__"``, which will allow instances to be weakref'd. Contributed by Eric V. Smith diff --git a/Misc/NEWS.d/3.12.0a1.rst b/Misc/NEWS.d/3.12.0a1.rst index f2668e99a6299b..0da7cdde1b2535 100644 --- a/Misc/NEWS.d/3.12.0a1.rst +++ b/Misc/NEWS.d/3.12.0a1.rst @@ -4330,7 +4330,7 @@ and ``sendfile`` inside ``IocpProactor``. .. nonce: GsBL9- .. section: Library -Fixed :meth:`collections.UserDict.get` to not call :meth:`__missing__` when +Fixed :meth:`collections.UserDict.get` to not call :meth:`~object.__missing__` when a value is not found. This matches the behavior of :class:`dict`. Patch by Bar Harel. diff --git a/Misc/NEWS.d/3.12.0a2.rst b/Misc/NEWS.d/3.12.0a2.rst index bc028f30636bf7..20e27c0d92f05f 100644 --- a/Misc/NEWS.d/3.12.0a2.rst +++ b/Misc/NEWS.d/3.12.0a2.rst @@ -35,11 +35,11 @@ Update bundled libexpat to 2.5.0 .. nonce: ik4iOv .. section: Core and Builtins -The docs clearly say that ``PyImport_Inittab``, +The docs clearly say that :c:data:`PyImport_Inittab`, :c:func:`PyImport_AppendInittab`, and :c:func:`PyImport_ExtendInittab` should not be used after :c:func:`Py_Initialize` has been called. We now enforce this for the two functions. Additionally, the runtime now uses an -internal copy of ``PyImport_Inittab``, to guard against modification. +internal copy of :c:data:`PyImport_Inittab`, to guard against modification. .. diff --git a/Misc/NEWS.d/3.13.0a5.rst b/Misc/NEWS.d/3.13.0a5.rst index d56b1542b01823..19ba16bc8c83d5 100644 --- a/Misc/NEWS.d/3.13.0a5.rst +++ b/Misc/NEWS.d/3.13.0a5.rst @@ -742,8 +742,8 @@ Add ``windows_31j`` to aliases for ``cp932`` codec .. nonce: fv35wU .. section: Library -:func:`functools.partial`s of :func:`repr` has been improved to include the -:term:`module` name. Patched by Furkan Onder and Anilyka Barry. +Always include the :term:`module` name in the :func:`repr` of +:func:`functools.partial` objects. Patch by Furkan Onder and Anilyka Barry. .. diff --git a/Misc/NEWS.d/3.13.0a6.rst b/Misc/NEWS.d/3.13.0a6.rst index 2740b4f0d967ba..ad6622d23bf36b 100644 --- a/Misc/NEWS.d/3.13.0a6.rst +++ b/Misc/NEWS.d/3.13.0a6.rst @@ -264,7 +264,8 @@ Improve performance of :func:`os.path.join` and :func:`os.path.expanduser`. .. nonce: hqk9Hn .. section: Library -Raise :exc:`TypeError` for non-paths in :func:`posixpath.relpath`. +Raise :exc:`TypeError` for non-paths in :func:`posixpath.relpath +<os.path.relpath>`. .. @@ -273,7 +274,8 @@ Raise :exc:`TypeError` for non-paths in :func:`posixpath.relpath`. .. nonce: l6rWlj .. section: Library -Preserve mailbox ownership when rewriting in :func:`mailbox.mbox.flush`. +Preserve mailbox ownership when rewriting in :func:`mailbox.mbox.flush +<mailbox.Mailbox.flush>`. Patch by Tony Mountifield. .. diff --git a/Misc/NEWS.d/3.14.0.rst b/Misc/NEWS.d/3.14.0.rst new file mode 100644 index 00000000000000..438905c8fe9a82 --- /dev/null +++ b/Misc/NEWS.d/3.14.0.rst @@ -0,0 +1,78 @@ +.. date: 2025-10-06-23-56-36 +.. gh-issue: 124111 +.. nonce: KOlBvs +.. release date: 2025-10-07 +.. section: macOS + +Update macOS installer to use Tcl/Tk 8.6.17. + +.. + +.. date: 2025-10-04-12-29-31 +.. gh-issue: 139573 +.. nonce: vVpHaP +.. section: macOS + +Updated bundled version of OpenSSL to 3.0.18. + +.. + +.. date: 2025-10-04-12-18-45 +.. gh-issue: 139573 +.. nonce: EO9kVB +.. section: Windows + +Updated bundled version of OpenSSL to 3.0.18. + +.. + +.. date: 2025-09-25-10-31-02 +.. gh-issue: 139330 +.. nonce: 5WWkY0 +.. section: Tools/Demos + +SBOM generation tool didn't cross-check the version and checksum values +against the ``Modules/expat/refresh.sh`` script, leading to the values +becoming out-of-date during routine updates. + +.. + +.. date: 2025-08-28-06-22-26 +.. gh-issue: 132006 +.. nonce: eZQmc6 +.. section: Tools/Demos + +XCframeworks now include privacy manifests to satisfy Apple App Store +submission requirements. + +.. + +.. date: 2025-08-27-11-14-53 +.. gh-issue: 138171 +.. nonce: Suz8ob +.. section: Tools/Demos + +A script for building an iOS XCframework was added. As part of this change, +the top level ``iOS`` folder has been moved to be a subdirectory of the +``Apple`` folder. + +.. + +.. date: 2025-09-29-00-01-28 +.. gh-issue: 139400 +.. nonce: X2T-jO +.. section: Security + +:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only +garbage-collected once they are no longer referenced by subparsers created +by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`. Patch by +Sebastian Pipping. + +.. + +.. date: 2025-09-25-07-33-43 +.. gh-issue: 139312 +.. nonce: ygE8AC +.. section: Library + +Upgrade bundled libexpat to 2.7.3 diff --git a/Misc/NEWS.d/3.14.0a1.rst b/Misc/NEWS.d/3.14.0a1.rst index 98639f0d3505d5..1938976fa4226a 100644 --- a/Misc/NEWS.d/3.14.0a1.rst +++ b/Misc/NEWS.d/3.14.0a1.rst @@ -1033,7 +1033,7 @@ retrieve the spec information. .. nonce: s3vKql .. section: Library -:mod:`argparse` vim supports abbreviated single-dash long options separated +:mod:`argparse` supports abbreviated single-dash long options separated by ``=`` from its value. .. @@ -1999,7 +1999,7 @@ with an escape character. .. nonce: vi2bP- .. section: Library -:func:`@warnings.deprecated <warnings.deprecated>` now copies the coroutine +:deco:`warnings.deprecated` now copies the coroutine status of functions and methods so that :func:`inspect.iscoroutinefunction` returns the correct result. @@ -4913,11 +4913,11 @@ Allow tuples of length 20 in the freelist to be reused. .. nonce: lYKYYP .. section: Core and Builtins -:exc:`ValueError` messages for :meth:`!list.index`, :meth:`!range.index`, +:exc:`ValueError` messages for :meth:`list.index`, :meth:`range.index`, :meth:`!deque.index`, :meth:`!deque.remove` and :meth:`!ShareableList.index` no longer contain the repr of the searched value (which can be arbitrary -large) and are consistent with error messages for other :meth:`!index` and -:meth:`!remove` methods. +large) and are consistent with error messages for other :meth:`~sequence.index` and +:meth:`~sequence.remove` methods. .. @@ -5604,7 +5604,7 @@ Using :data:`NotImplemented` in a boolean context now raises .. nonce: wNMKVd .. section: Core and Builtins -Fix race condition in free-threaded build where :meth:`!list.extend` could +Fix race condition in free-threaded build where :meth:`list.extend` could expose uninitialised memory to concurrent readers. .. @@ -6092,7 +6092,7 @@ Patch by Victor Stinner. .. nonce: qOr9GF .. section: C API -Soft deprecate the :c:macro:`!Py_MEMCPY` macro: use directly ``memcpy()`` +Soft deprecate the :c:macro:`Py_MEMCPY` macro: use directly ``memcpy()`` instead. Patch by Victor Stinner. .. diff --git a/Misc/NEWS.d/3.14.0a6.rst b/Misc/NEWS.d/3.14.0a6.rst index bafd8845de6973..d8840b6f283e76 100644 --- a/Misc/NEWS.d/3.14.0a6.rst +++ b/Misc/NEWS.d/3.14.0a6.rst @@ -1325,7 +1325,7 @@ variable. .. nonce: d75n8U .. section: Core and Builtins -Adapt :func:`reversed` for use in the free-theading build. The +Adapt :func:`reversed` for use in the free-threading build. The :func:`reversed` is still not thread-safe in the sense that concurrent iterations may see the same object, but they will not corrupt the interpreter state. diff --git a/Misc/NEWS.d/3.14.0a7.rst b/Misc/NEWS.d/3.14.0a7.rst index 35b96d33da4175..946ca27046a745 100644 --- a/Misc/NEWS.d/3.14.0a7.rst +++ b/Misc/NEWS.d/3.14.0a7.rst @@ -192,7 +192,7 @@ The :class:`ctypes.py_object` type now supports subscription, making it a .. nonce: cX4yTn .. section: Library -Add the :attr:`zipfile.ZipFile.data_offset` attribute, which stores the +Add the :attr:`!zipfile.ZipFile.data_offset` attribute, which stores the offset to the beginning of ZIP data in a file when available. When the :class:`zipfile.ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the underlying file does not support ``tell()``, the value will be ``None`` @@ -671,7 +671,7 @@ Allow the JIT to remove an extra ``_TO_BOOL_BOOL`` instruction after .. nonce: dNh64H .. section: Core and Builtins -Fix crash when calling :meth:`!list.append` as an unbound method. +Fix crash when calling :meth:`list.append` as an unbound method. .. diff --git a/Misc/NEWS.d/3.14.0b1.rst b/Misc/NEWS.d/3.14.0b1.rst index 5847dea7d5e8ee..f2dd631358550b 100644 --- a/Misc/NEWS.d/3.14.0b1.rst +++ b/Misc/NEWS.d/3.14.0b1.rst @@ -1756,7 +1756,7 @@ Add support for macOS multi-arch builds with the JIT enabled .. nonce: q9fvyM .. section: Core and Builtins -PyREPL now supports syntax highlighing. Contributed by Łukasz Langa. +PyREPL now supports syntax highlighting. Contributed by Łukasz Langa. .. @@ -1797,7 +1797,7 @@ non-``None`` ``closure``. Patch by Bartosz Sławecki. .. nonce: Uj7lyY .. section: Core and Builtins -Fix a bug that was allowing newlines inconsitently in format specifiers for +Fix a bug that was allowing newlines inconsistently in format specifiers for single-quoted f-strings. Patch by Pablo Galindo. .. @@ -2012,7 +2012,7 @@ interpreter. .. nonce: ofI5Fl .. section: C API -Add support of nullable arguments in :c:func:`PyArg_Parse` and similar +[Reverted in :gh:`136991`] Add support of nullable arguments in :c:func:`PyArg_Parse` and similar functions. Adding ``?`` after any format unit makes ``None`` be accepted as a value. diff --git a/Misc/NEWS.d/3.14.0b2.rst b/Misc/NEWS.d/3.14.0b2.rst new file mode 100644 index 00000000000000..214f7430a58395 --- /dev/null +++ b/Misc/NEWS.d/3.14.0b2.rst @@ -0,0 +1,766 @@ +.. date: 2025-05-20-21-43-20 +.. gh-issue: 130727 +.. nonce: -69t4D +.. release date: 2025-05-26 +.. section: Windows + +Fix a race in internal calls into WMI that can result in an "invalid handle" +exception under high load. Patch by Chris Eibl. + +.. + +.. date: 2025-05-19-03-02-04 +.. gh-issue: 76023 +.. nonce: vHOf6M +.. section: Windows + +Make :func:`os.path.realpath` ignore Windows error 1005 when in non-strict +mode. + +.. + +.. date: 2025-05-13-13-25-27 +.. gh-issue: 133779 +.. nonce: -YcTBz +.. section: Windows + +Reverts the change to generate different :file:`pyconfig.h` files based on +compiler settings, as it was frequently causing extension builds to break. +In particular, the ``Py_GIL_DISABLED`` preprocessor variable must now always +be defined explicitly when compiling for the experimental free-threaded +runtime. The :func:`sysconfig.get_config_var` function can be used to +determine whether the current runtime was compiled with that flag or not. + +.. + +.. date: 2025-05-08-19-07-26 +.. gh-issue: 133626 +.. nonce: yFTKYK +.. section: Windows + +Ensures packages are not accidentally bundled into the traditional +installer. + +.. + +.. date: 2025-05-19-14-57-46 +.. gh-issue: 134215 +.. nonce: sbdDK6 +.. section: Tools/Demos + +:term:`REPL` import autocomplete only suggests private modules when +explicitly specified. + +.. + +.. date: 2025-05-09-14-54-48 +.. gh-issue: 133744 +.. nonce: LCquu0 +.. section: Tests + +Fix multiprocessing interrupt test. Add an event to synchronize the parent +process with the child process: wait until the child process starts +sleeping. Patch by Victor Stinner. + +.. + +.. date: 2025-05-09-04-11-06 +.. gh-issue: 133682 +.. nonce: -_lwo3 +.. section: Tests + +Fixed test case ``test.test_annotationlib.TestStringFormat.test_displays`` +which ensures proper handling of complex data structures (lists, sets, +dictionaries, and tuples) in string annotations. + +.. + +.. date: 2025-05-08-15-06-01 +.. gh-issue: 133639 +.. nonce: 50-kbV +.. section: Tests + +Fix ``TestPyReplAutoindent.test_auto_indent_default()`` doesn't run +``input_code``. + +.. + +.. date: 2025-05-09-20-22-54 +.. gh-issue: 133767 +.. nonce: kN2i3Q +.. section: Security + +Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error +handler. + +.. + +.. date: 2025-01-14-11-19-07 +.. gh-issue: 128840 +.. nonce: M1doZW +.. section: Security + +Short-circuit the processing of long IPv6 addresses early in +:mod:`ipaddress` to prevent excessive memory consumption and a minor +denial-of-service. + +.. + +.. date: 2025-05-26-12-31-08 +.. gh-issue: 132710 +.. nonce: ApU3TZ +.. section: Library + +If possible, ensure that :func:`uuid.getnode` returns the same result even +across different processes. Previously, the result was constant only within +the same process. Patch by Bénédikt Tran. + +.. + +.. date: 2025-05-24-03-10-36 +.. gh-issue: 80334 +.. nonce: z21cMa +.. section: Library + +:func:`multiprocessing.freeze_support` now checks for work on any "spawn" +start method platform rather than only on Windows. + +.. + +.. date: 2025-05-23-23-43-39 +.. gh-issue: 134582 +.. nonce: 9POq3l +.. section: Library + +Fix tokenize.untokenize() round-trip errors related to t-strings braces +escaping + +.. + +.. date: 2025-05-22-18-14-13 +.. gh-issue: 134546 +.. nonce: fjLVzK +.. section: Library + +Ensure :mod:`pdb` remote debugging script is readable by remote Python +process. + +.. + +.. date: 2025-05-22-14-12-53 +.. gh-issue: 134451 +.. nonce: M1rD-j +.. section: Library + +Converted ``asyncio.tools.CycleFoundException`` from dataclass to a regular +exception type. + +.. + +.. date: 2025-05-22-13-10-32 +.. gh-issue: 114177 +.. nonce: 3TYUJ3 +.. section: Library + +Fix :mod:`asyncio` to not close subprocess pipes which would otherwise error +out when the event loop is already closed. + +.. + +.. date: 2025-05-20-21-45-58 +.. gh-issue: 90871 +.. nonce: Gkvtp6 +.. section: Library + +Fixed an off by one error concerning the backlog parameter in +:meth:`~asyncio.loop.create_unix_server`. Contributed by Christian Harries. + +.. + +.. date: 2025-05-20-19-16-30 +.. gh-issue: 134323 +.. nonce: ZQZGvw +.. section: Library + +Fix the :meth:`threading.RLock.locked` method. + +.. + +.. date: 2025-05-20-15-13-43 +.. gh-issue: 86802 +.. nonce: trF7TM +.. section: Library + +Fixed asyncio memory leak in cancelled shield tasks. For shielded tasks +where the shield was cancelled, log potential exceptions through the +exception handler. Contributed by Christian Harries. + +.. + +.. date: 2025-05-19-20-59-06 +.. gh-issue: 134209 +.. nonce: anhTcF +.. section: Library + +:mod:`curses`: The :meth:`curses.window.instr` and +:meth:`curses.window.getstr` methods now allocate their internal buffer on +the heap instead of the stack; in addition, the max buffer size is increased +from 1023 to 2047. + +.. + +.. date: 2025-05-19-15-05-24 +.. gh-issue: 134235 +.. nonce: pz9PwV +.. section: Library + +Updated tab completion on REPL to include builtin modules. Contributed by +Tom Wang, Hunter Young + +.. + +.. date: 2025-05-19-10-32-11 +.. gh-issue: 134152 +.. nonce: INJC2j +.. section: Library + +Fixed :exc:`UnboundLocalError` that could occur during :mod:`email` header +parsing if an expected trailing delimiter is missing in some contexts. + +.. + +.. date: 2025-05-18-13-23-29 +.. gh-issue: 134168 +.. nonce: hgx3Xg +.. section: Library + +:mod:`http.server`: Fix IPv6 address binding and :option:`--directory +<http.server --directory>` handling when using HTTPS. + +.. + +.. date: 2025-05-18-12-48-39 +.. gh-issue: 62184 +.. nonce: y11l10 +.. section: Library + +Remove import of C implementation of :class:`io.FileIO` from Python +implementation which has its own implementation + +.. + +.. date: 2025-05-17-20-23-57 +.. gh-issue: 133982 +.. nonce: smS7au +.. section: Library + +Emit :exc:`RuntimeWarning` in the Python implementation of :mod:`io` when +the :term:`file-like object <file object>` is not closed explicitly in the +presence of multiple I/O layers. + +.. + +.. date: 2025-05-17-18-08-35 +.. gh-issue: 133890 +.. nonce: onn9_X +.. section: Library + +The :mod:`tarfile` module now handles :exc:`UnicodeEncodeError` in the same +way as :exc:`OSError` when cannot extract a member. + +.. + +.. date: 2025-05-17-13-46-20 +.. gh-issue: 134097 +.. nonce: fgkjE1 +.. section: Library + +Fix interaction of the new :term:`REPL` and :option:`-X showrefcount <-X>` +command line option. + +.. + +.. date: 2025-05-17-12-40-12 +.. gh-issue: 133889 +.. nonce: Eh-zO4 +.. section: Library + +The generated directory listing page in +:class:`http.server.SimpleHTTPRequestHandler` now only shows the decoded +path component of the requested URL, and not the query and fragment. + +.. + +.. date: 2025-05-16-20-10-25 +.. gh-issue: 134098 +.. nonce: YyTkKr +.. section: Library + +Fix handling paths that end with a percent-encoded slash (``%2f`` or +``%2F``) in :class:`http.server.SimpleHTTPRequestHandler`. + +.. + +.. date: 2025-05-16-12-40-37 +.. gh-issue: 132124 +.. nonce: T_5Odx +.. section: Library + +On POSIX-compliant systems, :func:`!multiprocessing.util.get_temp_dir` now +ignores :envvar:`TMPDIR` (and similar environment variables) if the path +length of ``AF_UNIX`` socket files exceeds the platform-specific maximum +length when using the :ref:`forkserver +<multiprocessing-start-method-forkserver>` start method. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-05-15-14-27-01 +.. gh-issue: 134062 +.. nonce: fRbJet +.. section: Library + +:mod:`ipaddress`: fix collisions in :meth:`~object.__hash__` for +:class:`~ipaddress.IPv4Network` and :class:`~ipaddress.IPv6Network` objects. + +.. + +.. date: 2025-05-13-18-54-56 +.. gh-issue: 133970 +.. nonce: 6G-Oi6 +.. section: Library + +Make :class:`!string.templatelib.Template` and +:class:`!string.templatelib.Interpolation` generic. + +.. + +.. date: 2025-05-13-18-21-59 +.. gh-issue: 71253 +.. nonce: -3Sf_K +.. section: Library + +Raise :exc:`ValueError` in :func:`open` if *opener* returns a negative +file-descriptor in the Python implementation of :mod:`io` to match the C +implementation. + +.. + +.. date: 2025-05-12-20-38-57 +.. gh-issue: 133960 +.. nonce: Aee79f +.. section: Library + +Simplify and improve :func:`typing.evaluate_forward_ref`. It now no longer +raises errors on certain invalid types. In several situations, it is now +able to evaluate forward references that were previously unsupported. + +.. + +.. date: 2025-05-12-06-52-10 +.. gh-issue: 133925 +.. nonce: elInBY +.. section: Library + +Make the private class ``typing._UnionGenericAlias`` hashable. + +.. + +.. date: 2025-05-10-12-06-55 +.. gh-issue: 133653 +.. nonce: Gb2aG4 +.. section: Library + +Fix :class:`argparse.ArgumentParser` with the *formatter_class* argument. +Fix TypeError when *formatter_class* is a custom subclass of +:class:`!HelpFormatter`. Fix TypeError when *formatter_class* is not a +subclass of :class:`!HelpFormatter` and non-standard *prefix_char* is used. +Fix support of colorizing when *formatter_class* is not a subclass of +:class:`!HelpFormatter`. + +.. + +.. date: 2025-05-09-20-59-24 +.. gh-issue: 132641 +.. nonce: 3qTw44 +.. section: Library + +Fixed a race in :func:`functools.lru_cache` under free-threading. + +.. + +.. date: 2025-05-09-19-05-24 +.. gh-issue: 133783 +.. nonce: 1voCnR +.. section: Library + +Fix bug with applying :func:`copy.replace` to :mod:`ast` objects. Attributes +that default to ``None`` were incorrectly treated as required for manually +created AST nodes. + +.. + +.. date: 2025-05-09-18-29-25 +.. gh-issue: 133684 +.. nonce: Y1DFSt +.. section: Library + +Fix bug where :func:`annotationlib.get_annotations` would return the wrong +result for certain classes that are part of a class hierarchy where ``from +__future__ import annotations`` is used. + +.. + +.. date: 2025-05-09-15-50-00 +.. gh-issue: 77057 +.. nonce: fV8SU- +.. section: Library + +Fix handling of invalid markup declarations in +:class:`html.parser.HTMLParser`. + +.. + +.. date: 2025-05-09-09-10-34 +.. gh-issue: 130328 +.. nonce: s9h4By +.. section: Library + +Speedup pasting in ``PyREPL`` on Windows in a legacy console. Patch by Chris +Eibl. + +.. + +.. date: 2025-05-09-08-49-03 +.. gh-issue: 133701 +.. nonce: KI8tGz +.. section: Library + +Fix bug where :class:`typing.TypedDict` classes defined under ``from +__future__ import annotations`` and inheriting from another ``TypedDict`` +had an incorrect ``__annotations__`` attribute. + +.. + +.. date: 2025-05-07-19-16-41 +.. gh-issue: 133581 +.. nonce: kERUCJ +.. section: Library + +Improve unparsing of t-strings in :func:`ast.unparse` and ``from __future__ +import annotations``. Empty t-strings now round-trip correctly and +formatting in interpolations is preserved. Patch by Jelle Zijlstra. + +.. + +.. date: 2025-05-06-22-54-37 +.. gh-issue: 133551 +.. nonce: rfy1tJ +.. section: Library + +Support t-strings (:pep:`750`) in :mod:`annotationlib`. Patch by Jelle +Zijlstra. + +.. + +.. date: 2025-05-05-22-11-24 +.. gh-issue: 133439 +.. nonce: LpmyFz +.. section: Library + +Fix dot commands with trailing spaces are mistaken for multi-line SQL +statements in the sqlite3 command-line interface. + +.. + +.. date: 2025-05-04-17-04-55 +.. gh-issue: 132493 +.. nonce: huirKi +.. section: Library + +Avoid accessing ``__annotations__`` unnecessarily in +:func:`inspect.signature`. + +.. + +.. date: 2025-04-29-11-48-46 +.. gh-issue: 132876 +.. nonce: lyTQGZ +.. section: Library + +``ldexp()`` on Windows doesn't round subnormal results before Windows 11, +but should. Python's :func:`math.ldexp` wrapper now does round them, so +results may change slightly, in rare cases of very small results, on Windows +versions before 11. + +.. + +.. date: 2025-04-26-15-50-12 +.. gh-issue: 133009 +.. nonce: etBuz5 +.. section: Library + +:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.__deepcopy__ +<object.__deepcopy__>` when the element is concurrently mutated. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-03-30-16-42-38 +.. gh-issue: 91555 +.. nonce: ShVtwW +.. section: Library + +Ignore log messages generated during handling of log messages, to avoid +deadlock or infinite recursion. [NOTE: This change has since been reverted.] + +.. + +.. date: 2024-10-28-06-54-22 +.. gh-issue: 125028 +.. nonce: GEY8Ws +.. section: Library + +:data:`functools.Placeholder` cannot be passed to :func:`functools.partial` +as a keyword argument. + +.. + +.. date: 2023-02-13-21-56-38 +.. gh-issue: 62824 +.. nonce: CBZzX3 +.. section: Library + +Fix aliases for ``iso8859_8`` encoding. Patch by Dave Goncalves. + +.. + +.. date: 2023-02-13-21-41-34 +.. gh-issue: 86155 +.. nonce: ppIGSC +.. section: Library + +:meth:`html.parser.HTMLParser.close` no longer loses data when the +``<script>`` tag is not closed. Patch by Waylan Limberg. + +.. + +.. date: 2022-07-24-20-56-32 +.. gh-issue: 69426 +.. nonce: unccw7 +.. section: Library + +Fix :class:`html.parser.HTMLParser` to not unescape character entities in +attribute values if they are followed by an ASCII alphanumeric or an equals +sign. + +.. + +.. bpo: 28494 +.. date: 2017-12-30-18-21-00 +.. nonce: Dt_Wks +.. section: Library + +Improve Zip file validation false positive rate in +:func:`zipfile.is_zipfile`. + +.. + +.. date: 2025-05-22-14-48-19 +.. gh-issue: 134381 +.. nonce: 2BXhth +.. section: Core and Builtins + +Fix :exc:`RuntimeError` when using a not-started :class:`threading.Thread` +after calling :func:`os.fork` + +.. + +.. date: 2025-05-21-18-02-56 +.. gh-issue: 127960 +.. nonce: W3J_2X +.. section: Core and Builtins + +PyREPL interactive shell no longer starts with ``__package__`` and +``__file__`` global names set to ``_pyrepl`` package internals. Contributed +by Yuichiro Tachibana. + +.. + +.. date: 2025-05-21-15-14-32 +.. gh-issue: 130397 +.. nonce: aG6EON +.. section: Core and Builtins + +Remove special-casing for C stack depth limits for WASI. Due to +WebAssembly's built-in stack protection this does not pose a security +concern. + +.. + +.. date: 2025-05-20-14-41-50 +.. gh-issue: 128066 +.. nonce: qzzGfv +.. section: Core and Builtins + +Fixes an edge case where PyREPL improperly threw an error when Python is +invoked on a read only filesystem while trying to write history file +entries. + +.. + +.. date: 2025-05-18-14-33-23 +.. gh-issue: 69605 +.. nonce: ZMO49F +.. section: Core and Builtins + +When auto-completing an import in the :term:`REPL`, finding no candidates +now issues no suggestion, rather than suggestions from the current +namespace. + +.. + +.. date: 2025-05-17-20-44-51 +.. gh-issue: 134158 +.. nonce: ewLNLp +.. section: Core and Builtins + +Fix coloring of double braces in f-strings and t-strings in the +:term:`REPL`. + +.. + +.. date: 2025-05-16-20-59-12 +.. gh-issue: 134119 +.. nonce: w8expI +.. section: Core and Builtins + +Fix crash when calling :func:`next` on an exhausted template string +iterator. Patch by Jelle Zijlstra. + +.. + +.. date: 2025-05-16-17-25-52 +.. gh-issue: 134100 +.. nonce: 5-FbLK +.. section: Core and Builtins + +Fix a use-after-free bug that occurs when an imported module isn't in +:data:`sys.modules` after its initial import. Patch by Nico-Posada. + +.. + +.. date: 2025-05-15-11-38-16 +.. gh-issue: 133999 +.. nonce: uBZ8uS +.. section: Core and Builtins + +Fix :exc:`SyntaxError` regression in :keyword:`except` parsing after +:gh:`123440`. + +.. + +.. date: 2025-05-11-13-40-42 +.. gh-issue: 133886 +.. nonce: ryBAyo +.. section: Core and Builtins + +Fix :func:`sys.remote_exec` for non-ASCII paths in non-UTF-8 locales and +non-UTF-8 paths in UTF-8 locales. + +.. + +.. date: 2025-05-10-17-12-27 +.. gh-issue: 133703 +.. nonce: bVM-re +.. section: Core and Builtins + +Fix hashtable in dict can be bigger than intended in some situations. + +.. + +.. date: 2025-05-09-18-11-21 +.. gh-issue: 133778 +.. nonce: pWEV3t +.. section: Core and Builtins + +Fix bug where assigning to the :attr:`~type.__annotations__` attributes of +classes defined under ``from __future__ import annotations`` had no effect. + +.. + +.. date: 2025-05-08-13-48-02 +.. gh-issue: 132762 +.. nonce: tKbygC +.. section: Core and Builtins + +:meth:`~dict.fromkeys` no longer loops forever when adding a small set of +keys to a large base dict. Patch by Angela Liss. + +.. + +.. date: 2025-05-07-23-26-53 +.. gh-issue: 133541 +.. nonce: bHIC55 +.. section: Core and Builtins + +Inconsistent indentation in user input crashed the new REPL when syntax +highlighting was active. This is now fixed. + +.. + +.. date: 2025-05-06-15-01-41 +.. gh-issue: 133516 +.. nonce: RqWVf2 +.. section: Core and Builtins + +Raise :exc:`ValueError` when constants ``True``, ``False`` or ``None`` are +used as an identifier after NFKC normalization. + +.. + +.. date: 2025-04-19-17-16-46 +.. gh-issue: 132542 +.. nonce: 7T_TY_ +.. section: Core and Builtins + +Update :attr:`Thread.native_id <threading.Thread.native_id>` after +:manpage:`fork(2)` to ensure accuracy. Patch by Noam Cohen. + +.. + +.. date: 2025-05-17-14-41-21 +.. gh-issue: 134144 +.. nonce: xVpZik +.. section: C API + +Fix crash when calling :c:func:`Py_EndInterpreter` with a :term:`thread +state` that isn't the initial thread for the interpreter. + +.. + +.. date: 2025-05-21-19-46-28 +.. gh-issue: 134455 +.. nonce: vdwlrq +.. section: Build + +Fixed ``build-details.json`` generation to use the correct ``c_api.headers`` +as defined in :pep:`739`, instead of ``c_api.include``. + +.. + +.. date: 2025-04-30-10-22-08 +.. gh-issue: 131769 +.. nonce: H0oy5x +.. section: Build + +Fix detecting when the build Python in a cross-build is a pydebug build. + +.. + +.. date: 2025-04-16-09-38-48 +.. gh-issue: 117088 +.. nonce: EFt_5c +.. section: Build + +AIX linker don't support -h option, so avoid it through platform check diff --git a/Misc/NEWS.d/3.14.0b3.rst b/Misc/NEWS.d/3.14.0b3.rst new file mode 100644 index 00000000000000..6b8c5ff8f95822 --- /dev/null +++ b/Misc/NEWS.d/3.14.0b3.rst @@ -0,0 +1,571 @@ +.. date: 2025-06-03-18-26-54 +.. gh-issue: 135099 +.. nonce: Q9usKm +.. release date: 2025-06-17 +.. section: Windows + +Fix a crash that could occur on Windows when a background thread waits on a +:c:type:`PyMutex` while the main thread is shutting down the interpreter. + +.. + +.. date: 2025-06-17-08-48-08 +.. gh-issue: 132815 +.. nonce: CY1Esu +.. section: Tests + +Fix test__opcode: add ``JUMP_BACKWARD`` to specialization stats. + +.. + +.. date: 2025-06-14-13-20-17 +.. gh-issue: 135489 +.. nonce: Uh0yVO +.. section: Tests + +Show verbose output for failing tests during PGO profiling step with +--enable-optimizations. + +.. + +.. date: 2025-06-04-13-07-44 +.. gh-issue: 135120 +.. nonce: NapnZT +.. section: Tests + +Add :func:`!test.support.subTests`. + +.. + +.. date: 2025-06-13-15-55-22 +.. gh-issue: 135462 +.. nonce: KBeJpc +.. section: Security + +Fix quadratic complexity in processing specially crafted input in +:class:`html.parser.HTMLParser`. End-of-file errors are now handled +according to the HTML5 specs -- comments and declarations are automatically +closed, tags are ignored. + +.. + +.. date: 2025-06-02-11-32-23 +.. gh-issue: 135034 +.. nonce: RLGjbp +.. section: Security + +Fixes multiple issues that allowed ``tarfile`` extraction filters +(``filter="data"`` and ``filter="tar"``) to be bypassed using crafted +symlinks and hard links. + +Addresses :cve:`2024-12718`, :cve:`2025-4138`, :cve:`2025-4330`, and +:cve:`2025-4517`. + +.. + +.. date: 2025-06-15-03-03-22 +.. gh-issue: 65697 +.. nonce: COdwZd +.. section: Library + +:class:`configparser`'s error message when attempting to write an invalid +key is now more helpful. + +.. + +.. date: 2025-06-14-14-19-13 +.. gh-issue: 135497 +.. nonce: 1pzwdA +.. section: Library + +Fix :func:`os.getlogin` failing for longer usernames on BSD-based platforms. + +.. + +.. date: 2025-06-12-18-15-31 +.. gh-issue: 135429 +.. nonce: mch75_ +.. section: Library + +Fix the argument mismatch in ``_lsprof`` for ``PY_THROW`` event. + +.. + +.. date: 2025-06-12-10-45-02 +.. gh-issue: 135368 +.. nonce: OjWVHL +.. section: Library + +Fix :class:`unittest.mock.Mock` generation on :func:`dataclasses.dataclass` +objects. Now all special attributes are set as it was before :gh:`124429`. + +.. + +.. date: 2025-06-10-16-11-00 +.. gh-issue: 133967 +.. nonce: P0c24q +.. section: Library + +Do not normalize :mod:`locale` name 'C.UTF-8' to 'en_US.UTF-8'. + +.. + +.. date: 2025-06-10-00-42-30 +.. gh-issue: 135321 +.. nonce: UHh9jT +.. section: Library + +Raise a correct exception for values greater than 0x7fffffff for the +``BINSTRING`` opcode in the C implementation of :mod:`pickle`. + +.. + +.. date: 2025-06-08-14-50-34 +.. gh-issue: 135276 +.. nonce: ZLUhV1 +.. section: Library + +Backported bugfixes in zipfile.Path from zipp 3.23. Fixed ``.name``, +``.stem`` and other basename-based properties on Windows when working with a +zipfile on disk. + +.. + +.. date: 2025-06-08-10-22-22 +.. gh-issue: 135244 +.. nonce: Y2SOTJ +.. section: Library + +:mod:`uuid`: when the MAC address cannot be determined, the 48-bit node ID +is now generated with a cryptographically-secure pseudo-random number +generator (CSPRNG) as per :rfc:`RFC 9562, §6.10.3 <9562#section-6.10-3>`. +This affects :func:`~uuid.uuid1` and :func:`~uuid.uuid6`. + +.. + +.. date: 2025-05-31-12-08-12 +.. gh-issue: 134970 +.. nonce: lgSaxq +.. section: Library + +Fix the "unknown action" exception in +:meth:`argparse.ArgumentParser.add_argument_group` to correctly replace the +action class. + +.. + +.. date: 2025-05-30-13-07-29 +.. gh-issue: 134718 +.. nonce: 9Qvhxn +.. section: Library + +:func:`ast.dump` now only omits ``None`` and ``[]`` values if they are +default values. + +.. + +.. date: 2025-05-30-09-46-21 +.. gh-issue: 134939 +.. nonce: Pu3nnm +.. section: Library + +Add the :mod:`concurrent.interpreters` module. See :pep:`734`. + +.. + +.. date: 2025-05-29-06-53-40 +.. gh-issue: 134885 +.. nonce: -_L22o +.. section: Library + +Fix possible crash in the :mod:`compression.zstd` module related to setting +parameter types. Patch by Jelle Zijlstra. + +.. + +.. date: 2025-05-28-20-49-29 +.. gh-issue: 134857 +.. nonce: dVYXVO +.. section: Library + +Improve error report for :mod:`doctest`\ s run with :mod:`unittest`. Remove +:mod:`!doctest` module frames from tracebacks and redundant newline +character from a failure message. + +.. + +.. date: 2025-05-28-15-53-27 +.. gh-issue: 128840 +.. nonce: Nur2pB +.. section: Library + +Fix parsing long IPv6 addresses with embedded IPv4 address. + +.. + +.. date: 2025-05-26-17-06-40 +.. gh-issue: 134637 +.. nonce: 9-3zRL +.. section: Library + +Fix performance regression in calling a :mod:`ctypes` function pointer in +:term:`free threading`. + +.. + +.. date: 2025-05-26-14-04-39 +.. gh-issue: 134696 +.. nonce: P04xUa +.. section: Library + +Built-in HACL* and OpenSSL implementations of hash function constructors now +correctly accept the same *documented* named arguments. For instance, +:func:`~hashlib.md5` could be previously invoked as ``md5(data=data)`` or +``md5(string=string)`` depending on the underlying implementation but these +calls were not compatible. Patch by Bénédikt Tran. + +.. + +.. date: 2025-05-25-23-23-05 +.. gh-issue: 134151 +.. nonce: 13Wwsb +.. section: Library + +:mod:`email`: Fix :exc:`TypeError` in :func:`email.utils.decode_params` when +sorting :rfc:`2231` continuations that contain an unnumbered section. + +.. + +.. date: 2025-05-24-13-10-35 +.. gh-issue: 134210 +.. nonce: 0IuMY2 +.. section: Library + +:func:`curses.window.getch` now correctly handles signals. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-05-18-23-46-21 +.. gh-issue: 134152 +.. nonce: 30HwbX +.. section: Library + +:mod:`email`: Fix parsing of email message ID with invalid domain. + +.. + +.. date: 2025-05-08-13-43-19 +.. gh-issue: 133489 +.. nonce: 9eGS1Z +.. section: Library + +:func:`random.getrandbits` can now generate more that 2\ :sup:`31` bits. +:func:`random.randbytes` can now generate more that 256 MiB. + +.. + +.. date: 2025-05-01-10-56-44 +.. gh-issue: 132813 +.. nonce: rKurvp +.. section: Library + +Improve error messages for incorrect types and values of +:class:`csv.Dialect` attributes. + +.. + +.. date: 2025-04-30-19-32-18 +.. gh-issue: 132969 +.. nonce: EagQ3G +.. section: Library + +Prevent the :class:`~concurrent.futures.ProcessPoolExecutor` executor +thread, which remains running when :meth:`shutdown(wait=False) +<concurrent.futures.Executor.shutdown>`, from attempting to adjust the +pool's worker processes after the object state has already been reset during +shutdown. A combination of conditions, including a worker process having +terminated abormally, resulted in an exception and a potential hang when the +still-running executor thread attempted to replace dead workers within the +pool. + +.. + +.. date: 2025-04-21-01-03-15 +.. gh-issue: 127081 +.. nonce: WXRliX +.. section: Library + +Fix libc thread safety issues with :mod:`os` by replacing ``getlogin`` with +``getlogin_r`` re-entrant version. + +.. + +.. date: 2025-04-07-06-41-54 +.. gh-issue: 131884 +.. nonce: ym9BJN +.. section: Library + +Fix formatting issues in :func:`json.dump` when both *indent* and *skipkeys* +are used. + +.. + +.. date: 2025-03-09-03-13-41 +.. gh-issue: 130999 +.. nonce: tBRBVB +.. section: Library + +Avoid exiting the new REPL and offer suggestions even if there are +non-string candidates when errors occur. + +.. + +.. date: 2025-06-10-17-02-06 +.. gh-issue: 135171 +.. nonce: quHvts +.. section: Documentation + +Document that the :term:`iterator` for the leftmost :keyword:`!for` clause +in the generator expression is created immediately. + +.. + +.. bpo: 45210 +.. date: 2021-09-15-13-07-25 +.. nonce: RtGk7i +.. section: Documentation + +Document that error indicator may be set in tp_dealloc, and how to avoid +clobbering it. + +.. + +.. date: 2025-06-14-01-01-14 +.. gh-issue: 135496 +.. nonce: ER0Me3 +.. section: Core and Builtins + +Fix typo in the f-string conversion type error ("exclamanation" -> +"exclamation"). + +.. + +.. date: 2025-06-12-18-12-42 +.. gh-issue: 135371 +.. nonce: R_YUtR +.. section: Core and Builtins + +Fixed :mod:`asyncio` debugging tools to properly display internal coroutine +call stacks alongside external task dependencies. The ``python -m asyncio +ps`` and ``python -m asyncio pstree`` commands now show complete execution +context. Patch by Pablo Galindo. + +.. + +.. date: 2025-06-11-15-08-10 +.. gh-issue: 127319 +.. nonce: OVGFSZ +.. section: Core and Builtins + +Set the ``allow_reuse_port`` class variable to ``False`` on the XMLRPC, +logging, and HTTP servers. This matches the behavior in prior Python +releases, which is to not allow port reuse. + +.. + +.. date: 2025-06-10-17-37-11 +.. gh-issue: 135171 +.. nonce: P9UDfS +.. section: Core and Builtins + +Reverts the behavior of async generator expressions when created with object +w/o __aiter__ method to the pre-3.13 behavior of raising a TypeError. + +.. + +.. date: 2025-06-09-23-57-37 +.. gh-issue: 130077 +.. nonce: MHknDB +.. section: Core and Builtins + +Properly raise custom syntax errors when incorrect syntax containing names +that are prefixes of soft keywords is encountered. Patch by Pablo Galindo. + +.. + +.. date: 2025-06-06-18-57-30 +.. gh-issue: 135171 +.. nonce: 0YtLq6 +.. section: Core and Builtins + +Reverts the behavior of generator expressions when created with a +non-iterable to the pre-3.13 behavior of raising a TypeError. It is no +longer possible to cause a crash in the debugger by altering the generator +expression's local variables. This is achieved by moving the ``GET_ITER`` +instruction back to the creation of the generator expression and adding an +additional check to ``FOR_ITER``. + +.. + +.. date: 2025-06-02-13-57-40 +.. gh-issue: 116738 +.. nonce: ycJsL8 +.. section: Core and Builtins + +Make methods in :mod:`heapq` thread-safe on the :term:`free threaded <free +threading>` build. + +.. + +.. date: 2025-05-31-10-26-46 +.. gh-issue: 134876 +.. nonce: 8mBGJI +.. section: Core and Builtins + +Add support to :pep:`768` remote debugging for Linux kernels which don't +have CONFIG_CROSS_MEMORY_ATTACH configured. + +.. + +.. date: 2025-05-30-18-09-54 +.. gh-issue: 134889 +.. nonce: Ic9UM- +.. section: Core and Builtins + +Fix handling of a few opcodes that leave operands on the stack when +optimizing ``LOAD_FAST``. + +.. + +.. date: 2025-05-30-15-56-19 +.. gh-issue: 134908 +.. nonce: 3a7PxM +.. section: Core and Builtins + +Fix crash when iterating over lines in a text file on the :term:`free +threaded <free threading>` build. + +.. + +.. date: 2025-05-27-20-29-00 +.. gh-issue: 132617 +.. nonce: EmUfQQ +.. section: Core and Builtins + +Fix :meth:`dict.update` modification check that could incorrectly raise a +"dict mutated during update" error when a different dictionary was modified +that happens to share the same underlying keys object. + +.. + +.. date: 2025-05-27-18-59-54 +.. gh-issue: 134679 +.. nonce: FWPBu6 +.. section: Core and Builtins + +Fix crash in the :term:`free threading` build's QSBR code that could occur +when changing an object's ``__dict__`` attribute. + +.. + +.. date: 2025-05-27-09-19-21 +.. gh-issue: 127682 +.. nonce: 9WwFrM +.. section: Core and Builtins + +No longer call ``__iter__`` twice in list comprehensions. This brings the +behavior of list comprehensions in line with other forms of iteration + +.. + +.. date: 2025-05-26-15-55-50 +.. gh-issue: 133912 +.. nonce: -xAguL +.. section: Core and Builtins + +Fix the C API function ``PyObject_GenericSetDict`` to handle extension +classes with inline values. + +.. + +.. date: 2025-06-05-11-06-07 +.. gh-issue: 134989 +.. nonce: 74p4ud +.. section: C API + +Fix ``Py_RETURN_NONE``, ``Py_RETURN_TRUE`` and ``Py_RETURN_FALSE`` macros in +the limited C API 3.11 and older: don't treat ``Py_None``, ``Py_True`` and +``Py_False`` as immortal. Patch by Victor Stinner. + +.. + +.. date: 2025-06-02-13-19-22 +.. gh-issue: 134989 +.. nonce: sDDyBN +.. section: C API + +Implement :c:func:`PyObject_DelAttr` and :c:func:`PyObject_DelAttrString` as +macros in the limited C API 3.12 and older. Patch by Victor Stinner. + +.. + +.. date: 2025-05-13-16-06-46 +.. gh-issue: 133968 +.. nonce: 6alWst +.. section: C API + +Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string +into a :c:type:`PyUnicodeWriter`. The function is faster than +:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the +input string contains non-ASCII characters. Patch by Victor Stinner. + +.. + +.. date: 2025-06-16-07-20-28 +.. gh-issue: 119132 +.. nonce: fcI8s7 +.. section: Build + +Remove "experimental" tag from the CPython free-threading build. + +.. + +.. date: 2025-06-14-10-32-11 +.. gh-issue: 135497 +.. nonce: ajlV4F +.. section: Build + +Fix the detection of ``MAXLOGNAME`` in the ``configure.ac`` script. + +.. + +.. date: 2025-05-30-11-02-30 +.. gh-issue: 134923 +.. nonce: gBkRg4 +.. section: Build + +Windows builds with profile-guided optimization enabled now use +``/GENPROFILE`` and ``/USEPROFILE`` instead of deprecated ``/LTCG:`` +options. + +.. + +.. date: 2025-05-27-17-04-20 +.. gh-issue: 134774 +.. nonce: CusyjW +.. section: Build + +Fix :c:macro:`Py_DEBUG` macro redefinition warnings on Windows debug builds. +Patch by Chris Eibl. + +.. + +.. date: 2025-05-24-16-59-20 +.. gh-issue: 134632 +.. nonce: i0W2hc +.. section: Build + +Fixed ``build-details.json`` generation to use ``INCLUDEPY``, in order to +reference the ``pythonX.Y`` subdirectory of the include directory, as +required in :pep:`739`, instead of the top-level include directory. diff --git a/Misc/NEWS.d/3.14.0b4.rst b/Misc/NEWS.d/3.14.0b4.rst new file mode 100644 index 00000000000000..349023ec75865d --- /dev/null +++ b/Misc/NEWS.d/3.14.0b4.rst @@ -0,0 +1,484 @@ +.. date: 2025-06-26-15-58-13 +.. gh-issue: 135968 +.. nonce: C4v_-W +.. release date: 2025-07-08 +.. section: Tools/Demos + +Stubs for ``strip`` are now provided as part of an iOS install. + +.. + +.. date: 2025-06-25-10-36-22 +.. gh-issue: 133600 +.. nonce: bkdgHC +.. section: Tools/Demos + +Backport file reorganization for Tools/wasm/wasi. + +This should make backporting future code changes easier. It also simplifies +instructions around how to do WASI builds in the devguide. + +.. + +.. date: 2025-06-26-15-15-35 +.. gh-issue: 135966 +.. nonce: EBpF8Y +.. section: Tests + +The iOS testbed now handles the ``app_packages`` folder as a site directory. + +.. + +.. date: 2025-06-19-15-29-38 +.. gh-issue: 135494 +.. nonce: FVl9a0 +.. section: Tests + +Fix regrtest to support excluding tests from ``--pgo`` tests. Patch by +Victor Stinner. + +.. + +.. date: 2025-06-27-21-23-19 +.. gh-issue: 136053 +.. nonce: QZxcee +.. section: Security + +:mod:`marshal`: fix a possible crash when deserializing :class:`slice` +objects. + +.. + +.. date: 2025-06-25-14-13-39 +.. gh-issue: 135661 +.. nonce: idjQ0B +.. section: Security + +Fix parsing start and end tags in :class:`html.parser.HTMLParser` according +to the HTML5 standard. + +* Whitespaces no longer accepted between ``</`` and the tag name. + E.g. ``</ script>`` does not end the script section. + +* Vertical tabulation (``\v``) and non-ASCII whitespaces no longer recognized + as whitespaces. The only whitespaces are ``\t\n\r\f`` and space. + +* Null character (U+0000) no longer ends the tag name. + +* Attributes and slashes after the tag name in end tags are now ignored, + instead of terminating after the first ``>`` in quoted attribute value. + E.g. ``</script/foo=">"/>``. + +* Multiple slashes and whitespaces between the last attribute and closing ``>`` + are now ignored in both start and end tags. E.g. ``<a foo=bar/ //>``. + +* Multiple ``=`` between attribute name and value are no longer collapsed. + E.g. ``<a foo==bar>`` produces attribute "foo" with value "=bar". + +* [Reverted in :gh:`136927`] Whitespaces between the ``=`` separator and attribute name or value are no + longer ignored. E.g. ``<a foo =bar>`` produces two attributes "foo" and + "=bar", both with value None; ``<a foo= bar>`` produces two attributes: + "foo" with value "" and "bar" with value None. + +.. + +.. date: 2025-06-18-13-28-08 +.. gh-issue: 102555 +.. nonce: nADrzJ +.. section: Security + +Fix comment parsing in :class:`html.parser.HTMLParser` according to the +HTML5 standard. ``--!>`` now ends the comment. ``-- >`` no longer ends the +comment. Support abnormally ended empty comments ``<-->`` and ``<--->``. + +.. + +.. date: 2025-07-05-09-45-04 +.. gh-issue: 136286 +.. nonce: N67Amr +.. section: Library + +Fix pickling failures for protocols 0 and 1 for many objects realted to +subinterpreters. + +.. + +.. date: 2025-07-05-06-56-16 +.. gh-issue: 136316 +.. nonce: 3zj_Do +.. section: Library + +Improve support for evaluating nested forward references in +:func:`typing.evaluate_forward_ref`. + +.. + +.. date: 2025-06-30-11-12-24 +.. gh-issue: 85702 +.. nonce: 0Lrbwu +.. section: Library + +If ``zoneinfo._common.load_tzdata`` is given a package without a resource a +:exc:`zoneinfo.ZoneInfoNotFoundError` is raised rather than a +:exc:`PermissionError`. Patch by Victor Stinner. + +.. + +.. date: 2025-06-27-13-34-28 +.. gh-issue: 136028 +.. nonce: RY727g +.. section: Library + +Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH +DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ, +ber_MA and crh_UA. + +.. + +.. date: 2025-06-26-17-28-49 +.. gh-issue: 135995 +.. nonce: pPrDCt +.. section: Library + +In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE +RIGHT-POINTING ANGLE QUOTATION MARK). + +.. + +.. date: 2025-06-26-11-52-40 +.. gh-issue: 53203 +.. nonce: TMigBr +.. section: Library + +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER, +wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW. + +.. + +.. date: 2025-06-25-17-25-53 +.. gh-issue: 91555 +.. nonce: xUpTLD +.. section: Library + +An earlier change, which was introduced in 3.14.0b2, has been reverted. It +disabled logging for a logger during handling of log messages for that +logger. Since the reversion, the behaviour should be as it was before +3.14.0b2. + +.. + +.. date: 2025-06-24-14-43-24 +.. gh-issue: 135878 +.. nonce: Db4roX +.. section: Library + +Fixes a crash of :class:`types.SimpleNamespace` on :term:`free threading` +builds, when several threads were calling its :meth:`~object.__repr__` +method at the same time. + +.. + +.. date: 2025-06-24-10-52-35 +.. gh-issue: 135836 +.. nonce: s37351 +.. section: Library + +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could +occur when non-\ :exc:`OSError` exception is raised during connection and +socket's ``close()`` raises :exc:`!OSError`. + +.. + +.. date: 2025-06-23-11-04-25 +.. gh-issue: 135836 +.. nonce: -C-c4v +.. section: Library + +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could +occur when the Happy Eyeballs algorithm resulted in an empty exceptions list +during connection attempts. + +.. + +.. date: 2025-06-23-10-19-11 +.. gh-issue: 135855 +.. nonce: -J0AGF +.. section: Library + +Raise :exc:`TypeError` instead of :exc:`SystemError` when +:func:`!_interpreters.set___main___attrs` is passed a non-dict object. Patch +by Brian Schubert. + +.. + +.. date: 2025-06-22-16-23-44 +.. gh-issue: 135815 +.. nonce: 0DandH +.. section: Library + +:mod:`netrc`: skip security checks if :func:`os.getuid` is missing. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-06-22-02-16-17 +.. gh-issue: 135640 +.. nonce: FXyFL6 +.. section: Library + +Address bug where it was possible to call +:func:`xml.etree.ElementTree.ElementTree.write` on an ElementTree object +with an invalid root element. This behavior blanked the file passed to +``write`` if it already existed. + +.. + +.. date: 2025-06-18-13-58-13 +.. gh-issue: 135645 +.. nonce: 109nff +.. section: Library + +Added ``supports_isolated_interpreters`` field to +:data:`sys.implementation`. + +.. + +.. date: 2025-06-18-11-43-17 +.. gh-issue: 135646 +.. nonce: r7ekEn +.. section: Library + +Raise consistent :exc:`NameError` exceptions in +:func:`annotationlib.ForwardRef.evaluate` + +.. + +.. date: 2025-06-17-23-13-56 +.. gh-issue: 135557 +.. nonce: Bfcy4v +.. section: Library + +Fix races on :mod:`heapq` updates and :class:`list` reads on the :term:`free +threaded <free threading>` build. + +.. + +.. date: 2025-06-17-22-44-19 +.. gh-issue: 119180 +.. nonce: Ogv8Nj +.. section: Library + +Only fetch globals and locals if necessary in +:func:`annotationlib.get_annotations` + +.. + +.. date: 2025-06-16-15-03-03 +.. gh-issue: 135561 +.. nonce: mJCN8D +.. section: Library + +Fix a crash on DEBUG builds when an HACL* HMAC routine fails. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-06-14-12-06-55 +.. gh-issue: 135487 +.. nonce: KdVFff +.. section: Library + +Fix :meth:`!reprlib.Repr.repr_int` when given integers with more than +:func:`sys.get_int_max_str_digits` digits. Patch by Bénédikt Tran. + +.. + +.. date: 2025-06-10-21-42-04 +.. gh-issue: 135335 +.. nonce: WnUqb_ +.. section: Library + +:mod:`multiprocessing`: Flush ``stdout`` and ``stderr`` after preloading +modules in the ``forkserver``. + +.. + +.. date: 2025-06-03-12-59-17 +.. gh-issue: 135069 +.. nonce: xop30V +.. section: Library + +Fix the "Invalid error handling" exception in +:class:`!encodings.idna.IncrementalDecoder` to correctly replace the +'errors' parameter. + +.. + +.. date: 2025-06-02-14-36-28 +.. gh-issue: 130662 +.. nonce: Gpr2GB +.. section: Library + ++Accept leading zeros in precision and width fields for ++:class:`~decimal.Decimal` formatting, for example ``format(Decimal(1.25), +'.016f')``. + +.. + +.. date: 2025-06-02-14-28-30 +.. gh-issue: 130662 +.. nonce: EIgIR8 +.. section: Library + +Accept leading zeros in precision and width fields for +:class:`~fractions.Fraction` formatting, for example ``format(Fraction(1, +3), '.016f')``. + +.. + +.. date: 2025-04-07-10-20-16 +.. gh-issue: 87790 +.. nonce: X2SjJe +.. section: Library + +Support underscore and comma as thousands separators in the fractional part +for :class:`~fractions.Fraction`'s formatting. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-04-07-09-53-54 +.. gh-issue: 87790 +.. nonce: 6nj3zQ +.. section: Library + +Support underscore and comma as thousands separators in the fractional part +for :class:`~decimal.Decimal`'s formatting. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-03-11-05-24-14 +.. gh-issue: 130664 +.. nonce: g0yNMm +.. section: Library + +Handle corner-case for :class:`~fractions.Fraction`'s formatting: treat +zero-padding (preceding the width field by a zero (``'0'``) character) as an +equivalent to a fill character of ``'0'`` with an alignment type of ``'='``, +just as in case of :class:`float`'s. + +.. + +.. date: 2025-07-01-21-04-47 +.. gh-issue: 136155 +.. nonce: ufmH4Q +.. section: Documentation + +EPUB builds are fixed by excluding non-XHTML-compatible tags. + +.. + +.. date: 2025-07-06-14-53-19 +.. gh-issue: 109700 +.. nonce: KVNQQi +.. section: Core and Builtins + +Fix memory error handling in :c:func:`PyDict_SetDefault`. + +.. + +.. date: 2025-06-26-15-25-51 +.. gh-issue: 78465 +.. nonce: MbDN8X +.. section: Core and Builtins + +Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not +instantiable builtin or extension type (with ``tp_new`` set to ``NULL``). + +.. + +.. date: 2025-06-24-06-41-47 +.. gh-issue: 129958 +.. nonce: EaJuS0 +.. section: Core and Builtins + +Differentiate between t-strings and f-strings in syntax error for newlines +in format specifiers of single-quoted interpolated strings. + +.. + +.. date: 2025-06-23-18-08-32 +.. gh-issue: 135871 +.. nonce: 50C528 +.. section: Core and Builtins + +Non-blocking mutex lock attempts now return immediately when the lock is +busy instead of briefly spinning in the :term:`free threading` build. + +.. + +.. date: 2025-06-18-16-45-36 +.. gh-issue: 135106 +.. nonce: cpl6Aq +.. section: Core and Builtins + +Restrict the trashcan mechanism to GC'ed objects and untrack them while in +the trashcan to prevent the GC and trashcan mechanisms conflicting. + +.. + +.. date: 2025-06-17-22-34-58 +.. gh-issue: 135607 +.. nonce: ucsLVu +.. section: Core and Builtins + +Fix potential :mod:`weakref` races in an object's destructor on the +:term:`free threaded <free threading>` build. + +.. + +.. date: 2025-06-17-12-50-48 +.. gh-issue: 135608 +.. nonce: PnHckD +.. section: Core and Builtins + +Fix a crash in the JIT involving attributes of modules. + +.. + +.. date: 2025-06-16-02-31-42 +.. gh-issue: 135543 +.. nonce: 6b0HOF +.. section: Core and Builtins + +Emit ``sys.remote_exec`` audit event when :func:`sys.remote_exec` is called +and migrate ``remote_debugger_script`` to +``cpython.remote_debugger_script``. + +.. + +.. date: 2025-05-31-19-24-54 +.. gh-issue: 134280 +.. nonce: NDVbzY +.. section: Core and Builtins + +Disable constant folding for ``~`` with a boolean argument. This moves the +deprecation warning from compile time to runtime. + +.. + +.. date: 2025-06-25-01-03-10 +.. gh-issue: 135906 +.. nonce: UBrCWq +.. section: C API + +Fix compilation errors when compiling the internal headers with a C++ +compiler. + +.. + +.. date: 2025-05-19-18-09-20 +.. gh-issue: 134273 +.. nonce: ZAliyy +.. section: Build + +Add support for configuring compiler flags for the JIT with ``CFLAGS_JIT`` diff --git a/Misc/NEWS.d/3.14.0rc1.rst b/Misc/NEWS.d/3.14.0rc1.rst new file mode 100644 index 00000000000000..b7e9a2a7b122fc --- /dev/null +++ b/Misc/NEWS.d/3.14.0rc1.rst @@ -0,0 +1,316 @@ +.. date: 2025-07-05-15-10-42 +.. gh-issue: 136251 +.. nonce: GRM6o8 +.. release date: 2025-07-22 +.. section: Tools/Demos + +Fixes and usability improvements for ``Tools/wasm/emscripten/web_example`` + +.. + +.. date: 2025-07-21-14-15-25 +.. gh-issue: 135661 +.. nonce: nAxXw5 +.. section: Security + +Fix parsing attributes with whitespaces around the ``=`` separator in +:class:`html.parser.HTMLParser` according to the HTML5 standard. + +.. + +.. date: 2025-06-09-20-38-25 +.. gh-issue: 118350 +.. nonce: KgWCcP +.. section: Security + +Fix support of escapable raw text mode (elements "textarea" and "title") in +:class:`html.parser.HTMLParser`. + +.. + +.. date: 2025-07-21-22-35-50 +.. gh-issue: 136170 +.. nonce: QUlc78 +.. section: Library + +Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in +3.14.0a7 as it wasn't fully clear which behavior it should have in some +situations so the result was not always what a user might expect. + +.. + +.. date: 2025-07-21-16-10-24 +.. gh-issue: 124621 +.. nonce: wyoWc1 +.. section: Library + +pyrepl now works in Emscripten. + +.. + +.. date: 2025-07-20-16-02-00 +.. gh-issue: 136874 +.. nonce: cLC3o1 +.. section: Library + +Discard URL query and fragment in :func:`urllib.request.url2pathname`. + +.. + +.. date: 2025-07-19-16-20-54 +.. gh-issue: 130645 +.. nonce: O-dYcN +.. section: Library + +Enable color help by default in :mod:`argparse`. + +.. + +.. date: 2025-07-11-23-04-39 +.. gh-issue: 136549 +.. nonce: oAi8u4 +.. section: Library + +Fix signature of :func:`threading.excepthook`. + +.. + +.. date: 2025-07-11-03-39-15 +.. gh-issue: 136523 +.. nonce: s7caKL +.. section: Library + +Fix :class:`wave.Wave_write` emitting an unraisable when open raises. + +.. + +.. date: 2025-07-10-10-18-19 +.. gh-issue: 52876 +.. nonce: 9Vjrd8 +.. section: Library + +Add missing ``keepends`` (default ``True``) parameter to +:meth:`!codecs.StreamReaderWriter.readline` and +:meth:`!codecs.StreamReaderWriter.readlines`. + +.. + +.. date: 2025-07-10-00-47-37 +.. gh-issue: 136470 +.. nonce: KlUEUG +.. section: Library + +Correct :class:`concurrent.futures.InterpreterPoolExecutor`'s default thread +name. + +.. + +.. date: 2025-07-09-20-29-30 +.. gh-issue: 136476 +.. nonce: HyLLzh +.. section: Library + +Fix a bug that was causing the ``get_async_stack_trace`` function to miss +some frames in the stack trace. + +.. + +.. date: 2025-07-08-20-58-01 +.. gh-issue: 136434 +.. nonce: uuJsjS +.. section: Library + +Fix docs generation of ``UnboundItem`` in :mod:`concurrent.interpreters` +when running with :option:`-OO`. + +.. + +.. date: 2025-07-07-22-12-32 +.. gh-issue: 136380 +.. nonce: 1b_nXl +.. section: Library + +Raises :exc:`AttributeError` when accessing +:class:`concurrent.futures.InterpreterPoolExecutor` and subinterpreters are +not available. + +.. + +.. date: 2025-06-28-11-32-57 +.. gh-issue: 134759 +.. nonce: AjjKcG +.. section: Library + +Fix :exc:`UnboundLocalError` in :func:`email.message.Message.get_payload` +when the payload to decode is a :class:`bytes` object. Patch by Kliment +Lamonov. + +.. + +.. date: 2025-05-25-11-02-05 +.. gh-issue: 134657 +.. nonce: 3YFhR9 +.. section: Library + +:mod:`asyncio`: Remove some private names from ``asyncio.__all__``. + +.. + +.. date: 2025-07-19-12-37-05 +.. gh-issue: 136801 +.. nonce: XU_tF2 +.. section: Core and Builtins + +Fix PyREPL syntax highlighting on match cases after multi-line case. +Contributed by Olga Matoula. + +.. + +.. date: 2025-07-12-09-59-14 +.. gh-issue: 136421 +.. nonce: ZD1rNj +.. section: Core and Builtins + +Fix crash when initializing :mod:`datetime` concurrently. + +.. + +.. date: 2025-07-11-13-45-48 +.. gh-issue: 136541 +.. nonce: uZ_-Ju +.. section: Core and Builtins + +Fix some issues with the perf trampolines on x86-64 and aarch64. The +trampolines were not being generated correctly for some cases, which could +lead to the perf integration not working correctly. Patch by Pablo Galindo. + +.. + +.. date: 2025-07-10-23-23-50 +.. gh-issue: 136517 +.. nonce: _NHJyv +.. section: Core and Builtins + +Fixed a typo that prevented printing of uncollectable objects when the +:const:`gc.DEBUG_UNCOLLECTABLE` mode was set. + +.. + +.. date: 2025-07-10-15-53-16 +.. gh-issue: 136525 +.. nonce: xAko0e +.. section: Core and Builtins + +Fix issue where per-thread bytecode was not instrumented for newly created +threads. + +.. + +.. date: 2025-07-08-23-53-51 +.. gh-issue: 132661 +.. nonce: B84iYt +.. section: Core and Builtins + +``Interpolation.expression`` now has a default, the empty string. + +.. + +.. date: 2025-07-08-23-22-08 +.. gh-issue: 132661 +.. nonce: 34ftJl +.. section: Core and Builtins + +Reflect recent :pep:`750` change. + +Disallow concatenation of ``string.templatelib.Template`` and :class:`str`. +Also, disallow implicit concatenation of t-string literals with string or +f-string literals. + +.. + +.. date: 2025-06-12-00-03-34 +.. gh-issue: 116738 +.. nonce: iBBAdo +.. section: Core and Builtins + +Make functions in :mod:`grp` thread-safe on the :term:`free threaded <free +threading>` build. + +.. + +.. date: 2025-06-06-02-24-42 +.. gh-issue: 135148 +.. nonce: r-t2sC +.. section: Core and Builtins + +Fixed a bug where f-string debug expressions (using =) would incorrectly +strip out parts of strings containing escaped quotes and # characters. Patch +by Pablo Galindo. + +.. + +.. date: 2025-06-03-21-06-22 +.. gh-issue: 133136 +.. nonce: Usnvri +.. section: Core and Builtins + +Limit excess memory usage in the :term:`free threading` build when a large +dictionary or list is resized and accessed by multiple threads. + +.. + +.. date: 2025-05-17-20-56-05 +.. gh-issue: 91153 +.. nonce: afgtG2 +.. section: Core and Builtins + +Fix a crash when a :class:`bytearray` is concurrently mutated during item +assignment. + +.. + +.. date: 2025-04-16-12-01-13 +.. gh-issue: 127971 +.. nonce: pMDOQ0 +.. section: Core and Builtins + +Fix off-by-one read beyond the end of a string in string search. + +.. + +.. date: 2025-07-22-15-18-08 +.. gh-issue: 112068 +.. nonce: 4WvT-8 +.. section: C API + +Revert support of nullable arguments in :c:func:`PyArg_Parse`. + +.. + +.. date: 2025-06-24-11-10-01 +.. gh-issue: 133296 +.. nonce: lIEuVJ +.. section: C API + +New variants for the critical section API that accept one or two +:c:type:`PyMutex` pointers rather than :c:type:`PyObject` instances are now +public in the non-limited C API. + +.. + +.. date: 2025-05-20-17-13-51 +.. gh-issue: 134009 +.. nonce: CpCmry +.. section: C API + +Expose :c:func:`PyMutex_IsLocked` as part of the public C API. + +.. + +.. date: 2025-07-18-17-15-00 +.. gh-issue: 135621 +.. nonce: 9cyCNb +.. section: Build + +PyREPL no longer depends on the :mod:`curses` standard library. Contributed +by Łukasz Langa. diff --git a/Misc/NEWS.d/3.14.0rc2.rst b/Misc/NEWS.d/3.14.0rc2.rst new file mode 100644 index 00000000000000..ffa043eebbd7a0 --- /dev/null +++ b/Misc/NEWS.d/3.14.0rc2.rst @@ -0,0 +1,216 @@ +.. date: 2025-08-06-06-29-12 +.. gh-issue: 137450 +.. nonce: JZypb7 +.. release date: 2025-08-14 +.. section: macOS + +macOS installer shell path management improvements: separate the installer +``Shell profile updater`` postinstall script from the ``Update Shell +Profile.command`` to enable more robust error handling. + +.. + +.. date: 2025-07-27-02-17-40 +.. gh-issue: 137134 +.. nonce: pjgITs +.. section: macOS + +Update macOS installer to ship with SQLite version 3.50.4. + +.. + +.. date: 2025-07-27-02-16-53 +.. gh-issue: 137134 +.. nonce: W0WpDF +.. section: Windows + +Update Windows installer to ship with SQLite 3.50.4. + +.. + +.. date: 2025-08-08-15-00-38 +.. gh-issue: 137426 +.. nonce: lW-Rk2 +.. section: Library + +Remove the code deprecation of ``importlib.abc.ResourceLoader``. It is +documented as deprecated, but left for backwards compatibility with other +classes in ``importlib.abc``. + +.. + +.. date: 2025-07-31-10-31-56 +.. gh-issue: 137282 +.. nonce: GOCwIC +.. section: Library + +Fix tab completion and :func:`dir` on :mod:`concurrent.futures`. + +.. + +.. date: 2025-07-30-18-07-33 +.. gh-issue: 137257 +.. nonce: XBtzf2 +.. section: Library + +Bump the version of pip bundled in ensurepip to version 25.2 + +.. + +.. date: 2025-07-29-21-18-31 +.. gh-issue: 137226 +.. nonce: B_4lpu +.. section: Library + +Fix behavior of :meth:`annotationlib.ForwardRef.evaluate` when the +*type_params* parameter is passed and the name of a type param is also +present in an enclosing scope. + +.. + +.. date: 2025-07-25-09-21-56 +.. gh-issue: 130522 +.. nonce: Crwq68 +.. section: Library + +Fix unraisable :exc:`TypeError` raised during :term:`interpreter shutdown` +in the :mod:`threading` module. + +.. + +.. date: 2025-07-24-00-38-07 +.. gh-issue: 137059 +.. nonce: fr64oW +.. section: Library + +Fix handling of file URLs with a Windows drive letter in the URL authority +by :func:`urllib.request.url2pathname`. This fixes a regression in earlier +pre-releases of Python 3.14. + +.. + +.. date: 2025-07-23-00-35-29 +.. gh-issue: 130577 +.. nonce: c7EITy +.. section: Library + +:mod:`tarfile` now validates archives to ensure member offsets are +non-negative. (Contributed by Alexander Enrique Urieles Nieto in +:gh:`130577`.) + +.. + +.. date: 2025-07-20-16-56-55 +.. gh-issue: 135228 +.. nonce: n_XIao +.. section: Library + +When :mod:`dataclasses` replaces a class with a slotted dataclass, the +original class can now be garbage collected again. Earlier changes in Python +3.14 caused this class to always remain in existence together with the +replacement class synthesized by :mod:`dataclasses`. + +.. + +.. date: 2025-07-01-23-00-58 +.. gh-issue: 136155 +.. nonce: 4siQQO +.. section: Documentation + +We are now checking for fatal errors in EPUB builds in CI. + +.. + +.. date: 2025-08-06-15-39-54 +.. gh-issue: 137400 +.. nonce: xIw0zs +.. section: Core and Builtins + +Fix a crash in the :term:`free threading` build when disabling profiling or +tracing across all threads with :c:func:`PyEval_SetProfileAllThreads` or +:c:func:`PyEval_SetTraceAllThreads` or their Python equivalents +:func:`threading.settrace_all_threads` and +:func:`threading.setprofile_all_threads`. + +.. + +.. date: 2025-08-02-23-04-57 +.. gh-issue: 137314 +.. nonce: wjEdzD +.. section: Core and Builtins + +Fixed a regression where raw f-strings incorrectly interpreted escape +sequences in format specifications. Raw f-strings now properly preserve +literal backslashes in format specs, matching the behavior from Python 3.11. +For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead +of ``'ÿ'``. Patch by Pablo Galindo. + +.. + +.. date: 2025-08-02-10-27-53 +.. gh-issue: 137308 +.. nonce: at05p_ +.. section: Core and Builtins + +A standalone docstring in a node body is optimized as a :keyword:`pass` +statement to ensure that the node's body is never empty. There was a +:exc:`ValueError` in :func:`compile` otherwise. + +.. + +.. date: 2025-08-01-18-54-31 +.. gh-issue: 137288 +.. nonce: FhE7ku +.. section: Core and Builtins + +Fix bug where some bytecode instructions of a boolean expression are not +associated with the correct exception handler. + +.. + +.. date: 2025-07-28-19-11-34 +.. gh-issue: 134291 +.. nonce: IiB9Id +.. section: Core and Builtins + +Remove some newer macOS API usage from the JIT compiler in order to restore +compatibility with older OSX 10.15 deployment targets. + +.. + +.. date: 2025-07-25-22-31-52 +.. gh-issue: 131338 +.. nonce: zJDCMp +.. section: Core and Builtins + +Disable computed stack limit checks on non-glibc linux platforms to fix +crashes on deep recursion. + +.. + +.. date: 2025-07-24-17-30-58 +.. gh-issue: 136870 +.. nonce: ncx82J +.. section: Core and Builtins + +Fix data races while de-instrumenting bytecode of code objects running +concurrently in threads. + +.. + +.. date: 2025-08-13-13-41-04 +.. gh-issue: 137573 +.. nonce: r6uwRf +.. section: C API + +Mark ``_PyOptimizer_Optimize`` as :c:macro:`Py_NO_INLINE` to prevent stack +overflow crashes on macOS. + +.. + +.. date: 2025-08-13-12-10-12 +.. gh-issue: 132339 +.. nonce: 3Czz5y +.. section: Build + +Add support for OpenSSL 3.5. diff --git a/Misc/NEWS.d/3.14.0rc3.rst b/Misc/NEWS.d/3.14.0rc3.rst new file mode 100644 index 00000000000000..339f0ab3b1c7c9 --- /dev/null +++ b/Misc/NEWS.d/3.14.0rc3.rst @@ -0,0 +1,295 @@ +.. date: 2025-09-15-15-34-29 +.. gh-issue: 138896 +.. nonce: lkiF_7 +.. release date: 2025-09-18 +.. section: Windows + +Fix error installing C runtime on non-updated Windows machines + +.. + +.. date: 2025-08-21-14-04-50 +.. gh-issue: 137873 +.. nonce: qxffLt +.. section: Tools/Demos + +The iOS test runner has been simplified, resolving some issues that have +been observed using the runner in GitHub Actions and Azure Pipelines test +environments. + +.. + +.. date: 2025-06-18-13-34-55 +.. gh-issue: 135661 +.. nonce: NZlpWf +.. section: Security + +Fix CDATA section parsing in :class:`html.parser.HTMLParser` according to +the HTML5 standard: ``] ]>`` and ``]] >`` no longer end the CDATA section. +Add private method ``_set_support_cdata()`` which can be used to specify how +to parse ``<[CDATA[`` --- as a CDATA section in foreign content (SVG or +MathML) or as a bogus comment in the HTML namespace. + +.. + +.. date: 2025-09-16-19-05-29 +.. gh-issue: 138998 +.. nonce: URl0Y_ +.. section: Library + +Update bundled libexpat to 2.7.2 + +.. + +.. date: 2025-09-16-15-56-29 +.. gh-issue: 118803 +.. nonce: aOPtmL +.. section: Library + +Add back :class:`collections.abc.ByteString` and :class:`typing.ByteString`. +Both had been removed in prior alpha, beta and release candidates for Python +3.14, but their removal has now been postponed to Python 3.17. + +.. + +.. date: 2025-09-15-13-09-19 +.. gh-issue: 137226 +.. nonce: HH3_ik +.. section: Library + +Fix :func:`typing.get_type_hints` calls on generic :class:`typing.TypedDict` +classes defined with string annotations. + +.. + +.. date: 2025-09-12-01-01-05 +.. gh-issue: 138804 +.. nonce: 46ZukT +.. section: Library + +Raise :exc:`TypeError` instead of :exc:`AttributeError` when an argument of +incorrect type is passed to :func:`shlex.quote`. This restores the behavior +of the function prior to 3.14. + +.. + +.. date: 2025-09-10-10-02-59 +.. gh-issue: 128636 +.. nonce: ldRKGZ +.. section: Library + +Fix crash in PyREPL when os.environ is overwritten with an invalid value for +mac + +.. + +.. date: 2025-09-06-11-26-21 +.. gh-issue: 138514 +.. nonce: 66ltOb +.. section: Library + +Raise :exc:`ValueError` when a multi-character string is passed to the +*echo_char* parameter of :func:`getpass.getpass`. Patch by Benjamin Johnson. + +.. + +.. date: 2025-09-05-07-50-18 +.. gh-issue: 138515 +.. nonce: E3M-pu +.. section: Library + +:mod:`email` is added to Emscripten build. + +.. + +.. date: 2025-09-05-05-53-43 +.. gh-issue: 99948 +.. nonce: KMSlG6 +.. section: Library + +:func:`ctypes.util.find_library` now works in Emscripten build. + +.. + +.. date: 2025-08-30-10-58-15 +.. gh-issue: 138253 +.. nonce: 9Ehj-N +.. section: Library + +Add the *block* parameter in the :meth:`!put` and :meth:`!get` methods of +the :mod:`concurrent.interpreters` queues for compatibility with the +:class:`queue.Queue` interface. + +.. + +.. date: 2025-08-25-18-06-04 +.. gh-issue: 138133 +.. nonce: Zh9rGo +.. section: Library + +Prevent infinite traceback loop when sending CTRL^C to Python through +``strace``. + +.. + +.. date: 2025-08-18-16-02-51 +.. gh-issue: 134869 +.. nonce: GnAjnU +.. section: Library + +Fix an issue where pressing Ctrl+C during tab completion in the REPL would +leave the autocompletion menu in a corrupted state. + +.. + +.. date: 2025-08-16-18-11-41 +.. gh-issue: 90548 +.. nonce: q3aJUK +.. section: Library + +Fix ``musl`` detection for :func:`platform.libc_ver` on Alpine Linux if +compiled with --strip-all. + +.. + +.. date: 2025-07-13-13-31-22 +.. gh-issue: 136134 +.. nonce: mh6VjS +.. section: Library + +:meth:`!SMTP.auth_cram_md5` now raises an :exc:`~smtplib.SMTPException` +instead of a :exc:`ValueError` if Python has been built without MD5 support. +In particular, :class:`~smtplib.SMTP` clients will not attempt to use this +method even if the remote server is assumed to support it. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-07-13-11-20-05 +.. gh-issue: 136134 +.. nonce: xhh0Kq +.. section: Library + +:meth:`IMAP4.login_cram_md5 <imaplib.IMAP4.login_cram_md5>` now raises an +:exc:`IMAP4.error <imaplib.IMAP4.error>` if CRAM-MD5 authentication is not +supported. Patch by Bénédikt Tran. + +.. + +.. date: 2025-06-01-11-14-00 +.. gh-issue: 134953 +.. nonce: ashdfs +.. section: Library + +Expand ``_colorize`` theme with ``keyword_constant`` and implement in +:term:`repl`. + +.. + +.. date: 2025-09-10-14-53-59 +.. gh-issue: 71810 +.. nonce: ppf0J- +.. section: Core and Builtins + +Raise :exc:`OverflowError` for ``(-1).to_bytes()`` for signed conversions +when bytes count is zero. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-09-05-01-19-04 +.. gh-issue: 138192 +.. nonce: erluq5 +.. section: Core and Builtins + +Fix :mod:`contextvars` initialization so that all subinterpreters are +assigned the :attr:`~contextvars.Token.MISSING` value. + +.. + +.. date: 2025-09-03-17-00-30 +.. gh-issue: 138479 +.. nonce: qUxgWs +.. section: Core and Builtins + +Fix a crash when a generic object's ``__typing_subst__`` returns an object +that isn't a :class:`tuple`. + +.. + +.. date: 2025-09-02-09-10-06 +.. gh-issue: 138372 +.. nonce: h1Xk4- +.. section: Core and Builtins + +Fix :exc:`SyntaxWarning` emitted for erroneous subscript expressions +involving :ref:`template string literals <t-strings>`. Patch by Brian +Schubert. + +.. + +.. date: 2025-09-01-16-09-02 +.. gh-issue: 138318 +.. nonce: t-WEN5 +.. section: Core and Builtins + +The default REPL now avoids highlighting built-in names (for instance +:class:`set` or :func:`format`) when they are used as attribute names (for +instance in ``value.set`` or ``text.format``). + +.. + +.. date: 2025-09-01-13-54-43 +.. gh-issue: 138349 +.. nonce: 0fGmAi +.. section: Core and Builtins + +Fix crash in certain cases where a module contains both a module-level +annotation and a comprehension. + +.. + +.. date: 2025-08-22-11-39-40 +.. gh-issue: 137384 +.. nonce: j4b_in +.. section: Core and Builtins + +Fix a crash when using the :mod:`warnings` module in a finalizer at +shutdown. Patch by Kumar Aditya. + +.. + +.. date: 2025-08-17-13-36-53 +.. gh-issue: 137883 +.. nonce: 55VDCN +.. section: Core and Builtins + +Fix runaway recursion when calling a function with keyword arguments. + +.. + +.. date: 2025-08-15-15-45-26 +.. gh-issue: 137079 +.. nonce: YEow69 +.. section: Core and Builtins + +Fix keyword typo recognition when parsing files. Patch by Pablo Galindo. + +.. + +.. date: 2025-08-14-14-18-29 +.. gh-issue: 137728 +.. nonce: HdYS9R +.. section: Core and Builtins + +Fix the JIT's handling of many local variables. This previously caused a +segfault. + +.. + +.. date: 2025-08-10-21-34-12 +.. gh-issue: 137576 +.. nonce: 0ZicS- +.. section: Core and Builtins + +Fix for incorrect source code being shown in tracebacks from the Basic REPL +when :envvar:`PYTHONSTARTUP` is given. Patch by Adam Hartz. diff --git a/Misc/NEWS.d/3.14.1.rst b/Misc/NEWS.d/3.14.1.rst new file mode 100644 index 00000000000000..c00eb371b11fd1 --- /dev/null +++ b/Misc/NEWS.d/3.14.1.rst @@ -0,0 +1,2105 @@ +.. date: 2025-10-08-22-54-38 +.. gh-issue: 139810 +.. nonce: LAaemi +.. release date: 2025-12-02 +.. section: Windows + +Installing with ``py install 3[.x]-dev`` will now select final versions as +well as prereleases. + +.. + +.. date: 2025-11-18-13-55-47 +.. gh-issue: 141692 +.. nonce: tud9if +.. section: Tools/Demos + +Each slice of an iOS XCframework now contains a ``lib`` folder that contains +a symlink to the libpython dylib. This allows binary modules to be compiled +for iOS using dynamic libreary linking, rather than Framework linking. + +.. + +.. date: 2025-11-12-12-54-28 +.. gh-issue: 141442 +.. nonce: 50dS3P +.. section: Tools/Demos + +The iOS testbed now correctly handles test arguments that contain spaces. + +.. + +.. date: 2025-10-29-15-20-19 +.. gh-issue: 140702 +.. nonce: ZXtW8h +.. section: Tools/Demos + +The iOS testbed app will now expose the ``GITHUB_ACTIONS`` environment +variable to iOS apps being tested. + +.. + +.. date: 2025-08-06-11-54-55 +.. gh-issue: 137484 +.. nonce: 8iFAQs +.. section: Tools/Demos + +Have ``Tools/wasm/wasi`` put the build Python into a directory named after +the build triple instead of "build". + +.. + +.. date: 2025-07-30-11-15-47 +.. gh-issue: 137248 +.. nonce: 8IxwY3 +.. section: Tools/Demos + +Add a ``--logdir`` option to ``Tools/wasm/wasi`` for specifying where to +write log files. + +.. + +.. date: 2025-07-30-10-28-35 +.. gh-issue: 137243 +.. nonce: NkdUqH +.. section: Tools/Demos + +Have Tools/wasm/wasi detect a WASI SDK install in /opt when it was directly +extracted from a release tarball. + +.. + +.. date: 2025-10-23-16-39-49 +.. gh-issue: 140482 +.. nonce: ZMtyeD +.. section: Tests + +Preserve and restore the state of ``stty echo`` as part of the test +environment. + +.. + +.. date: 2025-10-15-00-52-12 +.. gh-issue: 140082 +.. nonce: fpET50 +.. section: Tests + +Update ``python -m test`` to set ``FORCE_COLOR=1`` when being run with color +enabled so that :mod:`unittest` which is run by it with redirected output +will output in color. + +.. + +.. date: 2025-09-22-15-40-09 +.. gh-issue: 139208 +.. nonce: Tc13dl +.. section: Tests + +Fix regrtest ``--fast-ci --verbose``: don't ignore the ``--verbose`` option +anymore. Patch by Victor Stinner. + +.. + +.. date: 2025-07-09-21-45-51 +.. gh-issue: 136442 +.. nonce: jlbklP +.. section: Tests + +Use exitcode ``1`` instead of ``5`` if :func:`unittest.TestCase.setUpClass` +raises an exception + +.. + +.. date: 2025-10-07-19-31-34 +.. gh-issue: 139700 +.. nonce: vNHU1O +.. section: Security + +Check consistency of the zip64 end of central directory record. Support +records with "zip64 extensible data" if there are no bytes prepended to the +ZIP file. + +.. + +.. date: 2025-09-24-13-39-56 +.. gh-issue: 139283 +.. nonce: jODz_q +.. section: Security + +:mod:`sqlite3`: correctly handle maximum number of rows to fetch in +:meth:`Cursor.fetchmany <sqlite3.Cursor.fetchmany>` and reject negative +values for :attr:`Cursor.arraysize <sqlite3.Cursor.arraysize>`. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-08-15-23-08-44 +.. gh-issue: 137836 +.. nonce: b55rhh +.. section: Security + +Add support of the "plaintext" element, RAWTEXT elements "xmp", "iframe", +"noembed" and "noframes", and optionally RAWTEXT element "noscript" in +:class:`html.parser.HTMLParser`. + +.. + +.. date: 2025-06-28-13-23-53 +.. gh-issue: 136063 +.. nonce: aGk0Jv +.. section: Security + +:mod:`email.message`: ensure linear complexity for legacy HTTP parameters +parsing. Patch by Bénédikt Tran. + +.. + +.. date: 2025-05-30-22-33-27 +.. gh-issue: 136065 +.. nonce: bu337o +.. section: Security + +Fix quadratic complexity in :func:`os.path.expandvars`. + +.. + +.. date: 2024-05-23-11-47-48 +.. gh-issue: 119451 +.. nonce: qkJe9- +.. section: Security + +Fix a potential memory denial of service in the :mod:`http.client` module. +When connecting to a malicious server, it could cause an arbitrary amount of +memory to be allocated. This could have led to symptoms including a +:exc:`MemoryError`, swapping, out of memory (OOM) killed processes or +containers, or even system crashes. + +.. + +.. date: 2024-05-21-22-11-31 +.. gh-issue: 119342 +.. nonce: BTFj4Z +.. section: Security + +Fix a potential memory denial of service in the :mod:`plistlib` module. When +reading a Plist file received from untrusted source, it could cause an +arbitrary amount of memory to be allocated. This could have led to symptoms +including a :exc:`MemoryError`, swapping, out of memory (OOM) killed +processes or containers, or even system crashes. + +.. + +.. date: 2025-11-29-04-20-44 +.. gh-issue: 74389 +.. nonce: pW3URj +.. section: Library + +When the stdin being used by a :class:`subprocess.Popen` instance is closed, +this is now ignored in :meth:`subprocess.Popen.communicate` instead of +leaving the class in an inconsistent state. + +.. + +.. date: 2025-11-29-03-02-45 +.. gh-issue: 87512 +.. nonce: bn4xbm +.. section: Library + +Fix :func:`subprocess.Popen.communicate` timeout handling on Windows when +writing large input. Previously, the timeout was ignored during stdin +writing, causing the method to block indefinitely if the child process did +not consume input quickly. The stdin write is now performed in a background +thread, allowing the timeout to be properly enforced. + +.. + +.. date: 2025-11-27-20-16-38 +.. gh-issue: 141473 +.. nonce: Wq4xVN +.. section: Library + +When :meth:`subprocess.Popen.communicate` was called with *input* and a +*timeout* and is called for a second time after a +:exc:`~subprocess.TimeoutExpired` exception before the process has died, it +should no longer hang. + +.. + +.. date: 2025-11-25-16-00-29 +.. gh-issue: 59000 +.. nonce: YtOyJy +.. section: Library + +Fix :mod:`pdb` breakpoint resolution for class methods when the module +defining the class is not imported. + +.. + +.. date: 2025-11-18-14-39-31 +.. gh-issue: 141570 +.. nonce: q3n984 +.. section: Library + +Support :term:`file-like object` raising :exc:`OSError` from +:meth:`~io.IOBase.fileno` in color detection (``_colorize.can_colorize()``). +This can occur when ``sys.stdout`` is redirected. + +.. + +.. date: 2025-11-17-08-16-30 +.. gh-issue: 141659 +.. nonce: QNi9Aj +.. section: Library + +Fix bad file descriptor errors from ``_posixsubprocess`` on AIX. + +.. + +.. date: 2025-11-15-14-58-12 +.. gh-issue: 141600 +.. nonce: XY2BXg +.. section: Library + +Fix musl version detection on Void Linux. + +.. + +.. date: 2025-11-14-16-24-20 +.. gh-issue: 141497 +.. nonce: L_CxDJ +.. section: Library + +:mod:`ipaddress`: ensure that the methods :meth:`IPv4Network.hosts() +<ipaddress.IPv4Network.hosts>` and :meth:`IPv6Network.hosts() +<ipaddress.IPv6Network.hosts>` always return an iterator. + +.. + +.. date: 2025-11-13-14-51-30 +.. gh-issue: 140938 +.. nonce: kXsHHv +.. section: Library + +The :func:`statistics.stdev` and :func:`statistics.pstdev` functions now +raise a :exc:`ValueError` when the input contains an infinity or a NaN. + +.. + +.. date: 2025-11-12-15-42-47 +.. gh-issue: 124111 +.. nonce: hTw4OE +.. section: Library + +Updated Tcl threading configuration in :mod:`_tkinter` to assume that +threads are always available in Tcl 9 and later. + +.. + +.. date: 2025-11-12-01-49-03 +.. gh-issue: 137109 +.. nonce: D6sq2B +.. section: Library + +The :mod:`os.fork` and related forking APIs will no longer warn in the +common case where Linux or macOS platform APIs return the number of threads +in a process and find the answer to be 1 even when a +:func:`os.register_at_fork` ``after_in_parent=`` callback (re)starts a +thread. + +.. + +.. date: 2025-11-10-01-47-18 +.. gh-issue: 141314 +.. nonce: baaa28 +.. section: Library + +Fix assertion failure in :meth:`io.TextIOWrapper.tell` when reading files +with standalone carriage return (``\r``) line endings. + +.. + +.. date: 2025-11-09-18-55-13 +.. gh-issue: 141311 +.. nonce: qZ3swc +.. section: Library + +Fix assertion failure in :func:`!io.BytesIO.readinto` and undefined behavior +arising when read position is above capcity in :class:`io.BytesIO`. + +.. + +.. date: 2025-11-06-15-11-50 +.. gh-issue: 141141 +.. nonce: tgIfgH +.. section: Library + +Fix a thread safety issue with :func:`base64.b85decode`. Contributed by +Benel Tayar. + +.. + +.. date: 2025-11-04-15-40-35 +.. gh-issue: 137969 +.. nonce: 9VZQVt +.. section: Library + +Fix :meth:`annotationlib.ForwardRef.evaluate` returning +:class:`~annotationlib.ForwardRef` objects which don't update with new +globals. + +.. + +.. date: 2025-11-03-17-13-00 +.. gh-issue: 140911 +.. nonce: 7KFvSQ +.. section: Library + +:mod:`collections`: Ensure that the methods ``UserString.rindex()`` and +``UserString.index()`` accept :class:`collections.UserString` instances as +the sub argument. + +.. + +.. date: 2025-11-03-16-23-54 +.. gh-issue: 140797 +.. nonce: DuFEeR +.. section: Library + +The undocumented :class:`!re.Scanner` class now forbids regular expressions +containing capturing groups in its lexicon patterns. Patterns using +capturing groups could previously lead to crashes with segmentation fault. +Use non-capturing groups (?:...) instead. + +.. + +.. date: 2025-11-03-05-38-31 +.. gh-issue: 125115 +.. nonce: jGS8MN +.. section: Library + +Refactor the :mod:`pdb` parsing issue so positional arguments can pass +through intuitively. + +.. + +.. date: 2025-11-02-19-23-32 +.. gh-issue: 140815 +.. nonce: McEG-T +.. section: Library + +:mod:`faulthandler` now detects if a frame or a code object is invalid or +freed. Patch by Victor Stinner. + +.. + +.. date: 2025-11-02-11-46-00 +.. gh-issue: 100218 +.. nonce: 9Ezfdq +.. section: Library + +Correctly set :attr:`~OSError.errno` when :func:`socket.if_nametoindex` or +:func:`socket.if_indextoname` raise an :exc:`OSError`. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-11-02-10-44-23 +.. gh-issue: 140875 +.. nonce: wt6B37 +.. section: Library + +Fix handling of unclosed character references (named and numerical) followed +by the end of file in :class:`html.parser.HTMLParser` with +``convert_charrefs=False``. + +.. + +.. date: 2025-11-02-09-37-22 +.. gh-issue: 140734 +.. nonce: f8gST9 +.. section: Library + +:mod:`multiprocessing`: fix off-by-one error when checking the length of a +temporary socket file path. Patch by Bénédikt Tran. + +.. + +.. date: 2025-11-01-00-36-14 +.. gh-issue: 140874 +.. nonce: eAWt3K +.. section: Library + +Bump the version of pip bundled in ensurepip to version 25.3 + +.. + +.. date: 2025-10-31-15-06-26 +.. gh-issue: 140691 +.. nonce: JzHGtg +.. section: Library + +In :mod:`urllib.request`, when opening a FTP URL fails because a data +connection cannot be made, the control connection's socket is now closed to +avoid a :exc:`ResourceWarning`. + +.. + +.. date: 2025-10-31-13-57-55 +.. gh-issue: 103847 +.. nonce: VM7TnW +.. section: Library + +Fix hang when cancelling process created by +:func:`asyncio.create_subprocess_exec` or +:func:`asyncio.create_subprocess_shell`. Patch by Kumar Aditya. + +.. + +.. date: 2025-10-29-16-12-41 +.. gh-issue: 120057 +.. nonce: qGj5Dl +.. section: Library + +Add :func:`os.reload_environ` to ``os.__all__``. + +.. + +.. date: 2025-10-28-17-43-51 +.. gh-issue: 140228 +.. nonce: 8kfHhO +.. section: Library + +Avoid making unnecessary filesystem calls for frozen modules in +:mod:`linecache` when the global module cache is not present. + +.. + +.. date: 2025-10-27-18-29-42 +.. gh-issue: 140590 +.. nonce: LT9HHn +.. section: Library + +Fix arguments checking for the :meth:`!functools.partial.__setstate__` that +may lead to internal state corruption and crash. Patch by Sergey Miryanov. + +.. + +.. date: 2025-10-27-16-01-41 +.. gh-issue: 125434 +.. nonce: qy0uRA +.. section: Library + +Display thread name in :mod:`faulthandler` on Windows. Patch by Victor +Stinner. + +.. + +.. date: 2025-10-27-13-49-31 +.. gh-issue: 140634 +.. nonce: ULng9G +.. section: Library + +Fix a reference counting bug in :meth:`!os.sched_param.__reduce__`. + +.. + +.. date: 2025-10-26-16-24-12 +.. gh-issue: 140633 +.. nonce: ioayC1 +.. section: Library + +Ignore :exc:`AttributeError` when setting a module's ``__file__`` attribute +when loading an extension module packaged as Apple Framework. + +.. + +.. date: 2025-10-25-21-26-16 +.. gh-issue: 140593 +.. nonce: OxlLc9 +.. section: Library + +:mod:`xml.parsers.expat`: Fix a memory leak that could affect users with +:meth:`~xml.parsers.expat.xmlparser.ElementDeclHandler` set to a custom +element declaration handler. Patch by Sebastian Pipping. + +.. + +.. date: 2025-10-25-21-04-00 +.. gh-issue: 140607 +.. nonce: oOZGxS +.. section: Library + +Inside :meth:`io.RawIOBase.read`, validate that the count of bytes returned +by :meth:`io.RawIOBase.readinto` is valid (inside the provided buffer). + +.. + +.. date: 2025-10-23-19-39-16 +.. gh-issue: 138162 +.. nonce: Znw5DN +.. section: Library + +Fix :class:`logging.LoggerAdapter` with ``merge_extra=True`` and without the +*extra* argument. + +.. + +.. date: 2025-10-23-12-12-22 +.. gh-issue: 138774 +.. nonce: mnh2gU +.. section: Library + +:func:`ast.unparse` now generates full source code when handling +:class:`ast.Interpolation` nodes that do not have a specified source. + +.. + +.. date: 2025-10-22-20-52-13 +.. gh-issue: 140474 +.. nonce: xIWlip +.. section: Library + +Fix memory leak in :class:`array.array` when creating arrays from an empty +:class:`str` and the ``u`` type code. + +.. + +.. date: 2025-10-21-15-54-13 +.. gh-issue: 137530 +.. nonce: ZyIVUH +.. section: Library + +:mod:`dataclasses` Fix annotations for generated ``__init__`` methods by +replacing the annotations that were in-line in the generated source code +with ``__annotate__`` functions attached to the methods. + +.. + +.. date: 2025-10-20-12-33-49 +.. gh-issue: 140348 +.. nonce: SAKnQZ +.. section: Library + +Fix regression in Python 3.14.0 where using the ``|`` operator on a +:class:`typing.Union` object combined with an object that is not a type +would raise an error. + +.. + +.. date: 2025-10-17-23-58-11 +.. gh-issue: 140272 +.. nonce: lhY8uS +.. section: Library + +Fix memory leak in the :meth:`!clear` method of the :mod:`dbm.gnu` database. + +.. + +.. date: 2025-10-15-21-42-13 +.. gh-issue: 140041 +.. nonce: _Fka2j +.. section: Library + +Fix import of :mod:`ctypes` on Android and Cygwin when ABI flags are +present. + +.. + +.. date: 2025-10-15-20-47-04 +.. gh-issue: 140120 +.. nonce: 3gffZq +.. section: Library + +Fixed a memory leak in :mod:`hmac` when it was using the hacl-star backend. +Discovered by ``@ashm-dev`` using AddressSanitizer. + +.. + +.. date: 2025-10-11-10-02-56 +.. gh-issue: 139905 +.. nonce: UyJIR_ +.. section: Library + +Add suggestion to error message for :class:`typing.Generic` subclasses when +``cls.__parameters__`` is missing due to a parent class failing to call +:meth:`super().__init_subclass__() <object.__init_subclass__>` in its +``__init_subclass__``. + +.. + +.. date: 2025-10-10-11-22-50 +.. gh-issue: 139894 +.. nonce: ECAXqj +.. section: Library + +Fix incorrect sharing of current task with the child process while forking +in :mod:`asyncio`. Patch by Kumar Aditya. + +.. + +.. date: 2025-10-09-21-37-20 +.. gh-issue: 139845 +.. nonce: dzx5UP +.. section: Library + +Fix to not print KeyboardInterrupt twice in default asyncio REPL. + +.. + +.. date: 2025-10-09-13-48-28 +.. gh-issue: 139783 +.. nonce: __NUgo +.. section: Library + +Fix :func:`inspect.getsourcelines` for the case when a decorator is followed +by a comment or an empty line. + +.. + +.. date: 2025-10-09-03-06-19 +.. gh-issue: 139809 +.. nonce: lzHJNu +.. section: Library + +Prevent premature colorization of subparser ``prog`` in +:meth:`argparse.ArgumentParser.add_subparsers` to respect color environment +variable changes after parser creation. + +.. + +.. date: 2025-10-08-00-06-30 +.. gh-issue: 139736 +.. nonce: baPeBd +.. section: Library + +Fix excessive indentation in the default :mod:`argparse` +:class:`!HelpFormatter`. Patch by Alexander Edland. + +.. + +.. date: 2025-10-02-17-40-10 +.. gh-issue: 70765 +.. nonce: zVlLZn +.. section: Library + +:mod:`http.server`: fix default handling of HTTP/0.9 requests in +:class:`~http.server.BaseHTTPRequestHandler`. Previously, +:meth:`!BaseHTTPRequestHandler.parse_request` incorrectly waited for headers +in the request although those are not supported in HTTP/0.9. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-09-30-12-52-54 +.. gh-issue: 63161 +.. nonce: mECM1A +.. section: Library + +Fix :func:`tokenize.detect_encoding`. Support non-UTF-8 shebang and comments +if non-UTF-8 encoding is specified. Detect decoding error for non-UTF-8 +encoding. Detect null bytes in source code. + +.. + +.. date: 2025-09-28-16-34-11 +.. gh-issue: 139391 +.. nonce: nRFnmx +.. section: Library + +Fix an issue when, on non-Windows platforms, it was not possible to +gracefully exit a ``python -m asyncio`` process suspended by Ctrl+Z and +later resumed by :manpage:`fg` other than with :manpage:`kill`. + +.. + +.. date: 2025-09-25-20-16-10 +.. gh-issue: 101828 +.. nonce: yTxJlJ +.. section: Library + +Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and +``'euc_jis_2004'`` codecs truncating null chars as they were treated as part +of multi-character sequences. + +.. + +.. date: 2025-09-24-14-17-34 +.. gh-issue: 139289 +.. nonce: Vmk25k +.. section: Library + +Do a real lazy-import on :mod:`rlcompleter` in :mod:`pdb` and restore the +existing completer after importing :mod:`rlcompleter`. + +.. + +.. date: 2025-09-23-09-46-46 +.. gh-issue: 139246 +.. nonce: pzfM-w +.. section: Library + +fix: paste zero-width in default repl width is wrong. + +.. + +.. date: 2025-09-22-14-40-11 +.. gh-issue: 90949 +.. nonce: UM35nb +.. section: Library + +Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold` +and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification` +to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of +disproportional amounts of dynamic memory from within an Expat parser. Patch +by Bénédikt Tran. + +.. + +.. date: 2025-09-21-15-58-57 +.. gh-issue: 139210 +.. nonce: HGbMvz +.. section: Library + +Fix use-after-free when reporting unknown event in +:func:`xml.etree.ElementTree.iterparse`. Patch by Ken Jin. + +.. + +.. date: 2025-09-20-17-50-31 +.. gh-issue: 138860 +.. nonce: Y9JXap +.. section: Library + +Lazy import :mod:`rlcompleter` in :mod:`pdb` to avoid deadlock in +subprocess. + +.. + +.. date: 2025-09-19-09-36-42 +.. gh-issue: 112729 +.. nonce: mmty0_ +.. section: Library + +Fix crash when calling :func:`concurrent.interpreters.create` when the +process is out of memory. + +.. + +.. date: 2025-09-18-05-32-18 +.. gh-issue: 135729 +.. nonce: 8AmMza +.. section: Library + +Fix unraisable exception during finalization when using +:mod:`concurrent.interpreters` in the REPL. + +.. + +.. date: 2025-09-17-21-54-53 +.. gh-issue: 139076 +.. nonce: 2eX9lG +.. section: Library + +Fix a bug in the :mod:`pydoc` module that was hiding functions in a Python +module if they were implemented in an extension module and the module did +not have ``__all__``. + +.. + +.. date: 2025-09-17-19-08-34 +.. gh-issue: 139065 +.. nonce: Hu8fM5 +.. section: Library + +Fix trailing space before a wrapped long word if the line length is exactly +*width* in :mod:`textwrap`. + +.. + +.. date: 2025-09-17-12-07-21 +.. gh-issue: 139001 +.. nonce: O6tseN +.. section: Library + +Fix race condition in :class:`pathlib.Path` on the internal ``_raw_paths`` +field. + +.. + +.. date: 2025-09-17-08-32-43 +.. gh-issue: 138813 +.. nonce: LHkHjX +.. section: Library + +:class:`!multiprocessing.BaseProcess` defaults ``kwargs`` to ``None`` +instead of a shared dictionary. + +.. + +.. date: 2025-09-16-16-46-58 +.. gh-issue: 138993 +.. nonce: -8s8_T +.. section: Library + +Dedent :data:`credits` text. + +.. + +.. date: 2025-09-15-21-03-11 +.. gh-issue: 138891 +.. nonce: oZFdtR +.. section: Library + +Fix ``SyntaxError`` when ``inspect.get_annotations(f, eval_str=True)`` is +called on a function annotated with a :pep:`646` ``star_expression`` + +.. + +.. date: 2025-09-15-19-29-12 +.. gh-issue: 130567 +.. nonce: shDEnT +.. section: Library + +Fix possible crash in :func:`locale.strxfrm` due to a platform bug on macOS. + +.. + +.. date: 2025-09-13-12-19-17 +.. gh-issue: 138859 +.. nonce: PxjIoN +.. section: Library + +Fix generic type parameterization raising a :exc:`TypeError` when omitting a +:class:`ParamSpec` that has a default which is not a list of types. + +.. + +.. date: 2025-09-12-09-34-37 +.. gh-issue: 138764 +.. nonce: mokHoY +.. section: Library + +Prevent :func:`annotationlib.call_annotate_function` from calling +``__annotate__`` functions that don't support ``VALUE_WITH_FAKE_GLOBALS`` in +a fake globals namespace with empty globals. + +Make ``FORWARDREF`` and ``STRING`` annotations fall back to using ``VALUE`` +annotations in the case that neither their own format, nor +``VALUE_WITH_FAKE_GLOBALS`` are supported. + +.. + +.. date: 2025-09-11-15-03-37 +.. gh-issue: 138775 +.. nonce: w7rnSx +.. section: Library + +Use of ``python -m`` with :mod:`base64` has been fixed to detect input from +a terminal so that it properly notices EOF. + +.. + +.. date: 2025-09-11-11-09-28 +.. gh-issue: 138779 +.. nonce: TNZnLr +.. section: Library + +Support device numbers larger than ``2**63-1`` for the +:attr:`~os.stat_result.st_rdev` field of the :class:`os.stat_result` +structure. + +.. + +.. date: 2025-09-05-21-10-24 +.. gh-issue: 137706 +.. nonce: 0EztiJ +.. section: Library + +Fix the partial evaluation of annotations that use ``typing.Annotated[T, +x]`` where ``T`` is a forward reference. + +.. + +.. date: 2025-09-05-15-35-59 +.. gh-issue: 88375 +.. nonce: dC491a +.. section: Library + +Fix normalization of the ``robots.txt`` rules and URLs in the +:mod:`urllib.robotparser` module. No longer ignore trailing ``?``. +Distinguish raw special characters ``?``, ``=`` and ``&`` from the +percent-encoded ones. + +.. + +.. date: 2025-09-04-15-18-11 +.. gh-issue: 111788 +.. nonce: tuTEM5 +.. section: Library + +Fix parsing errors in the :mod:`urllib.robotparser` module. Don't fail +trying to parse weird paths. Don't fail trying to decode non-UTF-8 +``robots.txt`` files. + +.. + +.. date: 2025-09-03-20-18-39 +.. gh-issue: 98896 +.. nonce: tjez89 +.. section: Library + +Fix a failure in multiprocessing resource_tracker when SharedMemory names +contain colons. Patch by Rani Pinchuk. + +.. + +.. date: 2025-09-03-18-26-07 +.. gh-issue: 138425 +.. nonce: cVE9Ho +.. section: Library + +Fix partial evaluation of :class:`annotationlib.ForwardRef` objects which +rely on names defined as globals. + +.. + +.. date: 2025-09-03-15-20-10 +.. gh-issue: 138432 +.. nonce: RMc7UX +.. section: Library + +:meth:`zoneinfo.reset_tzpath` will now convert any :class:`os.PathLike` +objects it receives into strings before adding them to ``TZPATH``. It will +raise ``TypeError`` if anything other than a string is found after this +conversion. If given an :class:`os.PathLike` object that represents a +relative path, it will now raise ``ValueError`` instead of ``TypeError``, +and present a more informative error message. + +.. + +.. date: 2025-08-31-09-06-49 +.. gh-issue: 138008 +.. nonce: heOvsU +.. section: Library + +Fix segmentation faults in the :mod:`ctypes` module due to invalid +:attr:`~ctypes._CFuncPtr.argtypes`. Patch by Dung Nguyen. + +.. + +.. date: 2025-08-30-10-04-28 +.. gh-issue: 60462 +.. nonce: yh_vDc +.. section: Library + +Fix :func:`locale.strxfrm` on Solaris (and possibly other platforms). + +.. + +.. date: 2025-08-29-12-56-55 +.. gh-issue: 138239 +.. nonce: uthZFI +.. section: Library + +The REPL now highlights :keyword:`type` as a soft keyword in :ref:`type +statements <type>`. + +.. + +.. date: 2025-08-28-13-20-09 +.. gh-issue: 138204 +.. nonce: 8oLOud +.. section: Library + +Forbid expansion of shared anonymous :mod:`memory maps <mmap>` on Linux, +which caused a bus error. + +.. + +.. date: 2025-08-27-17-05-36 +.. gh-issue: 138010 +.. nonce: ZZJmPL +.. section: Library + +Fix an issue where defining a class with an :func:`@warnings.deprecated +<warnings.deprecated>`-decorated base class may not invoke the correct +:meth:`~object.__init_subclass__` method in cases involving multiple +inheritance. Patch by Brian Schubert. + +.. + +.. date: 2025-08-26-08-17-56 +.. gh-issue: 138151 +.. nonce: I6CdAk +.. section: Library + +In :mod:`annotationlib`, improve evaluation of forward references to +nonlocal variables that are not yet defined when the annotations are +initially evaluated. + +.. + +.. date: 2025-08-16-16-04-15 +.. gh-issue: 137317 +.. nonce: Dl13B5 +.. section: Library + +:func:`inspect.signature` now correctly handles classes that use a +descriptor on a wrapped :meth:`!__init__` or :meth:`!__new__` method. +Contributed by Yongyu Yan. + +.. + +.. date: 2025-08-16-09-02-11 +.. gh-issue: 137754 +.. nonce: mCev1Y +.. section: Library + +Fix import of the :mod:`zoneinfo` module if the C implementation of the +:mod:`datetime` module is not available. + +.. + +.. date: 2025-08-07-17-18-57 +.. gh-issue: 137490 +.. nonce: s89ieZ +.. section: Library + +Handle :data:`~errno.ECANCELED` in the same way as :data:`~errno.EINTR` in +:func:`signal.sigwaitinfo` on NetBSD. + +.. + +.. date: 2025-08-06-23-16-42 +.. gh-issue: 137477 +.. nonce: bk6BDV +.. section: Library + +Fix :func:`!inspect.getblock`, :func:`inspect.getsourcelines` and +:func:`inspect.getsource` for generator expressions. + +.. + +.. date: 2025-08-03-13-16-39 +.. gh-issue: 137044 +.. nonce: 0hPVL_ +.. section: Library + +Return large limit values as positive integers instead of negative integers +in :func:`resource.getrlimit`. Accept large values and reject negative +values (except :data:`~resource.RLIM_INFINITY`) for limits in +:func:`resource.setrlimit`. + +.. + +.. date: 2025-08-01-23-52-49 +.. gh-issue: 75989 +.. nonce: 5aYXNJ +.. section: Library + +:func:`tarfile.TarFile.extractall` and :func:`tarfile.TarFile.extract` now +overwrite symlinks when extracting hardlinks. (Contributed by Alexander +Enrique Urieles Nieto in :gh:`75989`.) + +.. + +.. date: 2025-08-01-23-11-25 +.. gh-issue: 137017 +.. nonce: 0yGcNc +.. section: Library + +Fix :obj:`threading.Thread.is_alive` to remain ``True`` until the underlying +OS thread is fully cleaned up. This avoids false negatives in edge cases +involving thread monitoring or premature :obj:`threading.Thread.is_alive` +calls. + +.. + +.. date: 2025-08-01-15-07-59 +.. gh-issue: 137273 +.. nonce: 4V8Xmv +.. section: Library + +Fix debug assertion failure in :func:`locale.setlocale` on Windows. + +.. + +.. date: 2025-07-30-17-42-36 +.. gh-issue: 137239 +.. nonce: qSpj32 +.. section: Library + +:mod:`heapq`: Update :data:`!heapq.__all__` with ``*_max`` functions. + +.. + +.. date: 2025-07-28-23-11-29 +.. gh-issue: 81325 +.. nonce: jMJFBe +.. section: Library + +:class:`tarfile.TarFile` now accepts a :term:`path-like <path-like object>` +when working on a tar archive. (Contributed by Alexander Enrique Urieles +Nieto in :gh:`81325`.) + +.. + +.. date: 2025-07-28-20-48-32 +.. gh-issue: 137185 +.. nonce: fgI7-B +.. section: Library + +Fix a potential async-signal-safety issue in :mod:`faulthandler` when +printing C stack traces. + +.. + +.. date: 2025-07-21-15-40-00 +.. gh-issue: 136914 +.. nonce: -GNG-d +.. section: Library + +Fix retrieval of :attr:`doctest.DocTest.lineno` for objects decorated with +:func:`functools.cache` or :class:`functools.cached_property`. + +.. + +.. date: 2025-07-21-11-56-47 +.. gh-issue: 136912 +.. nonce: zWosAL +.. section: Library + +:func:`hmac.digest` now properly handles large keys and messages by falling +back to the pure Python implementation when necessary. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-07-21-01-16-32 +.. gh-issue: 83424 +.. nonce: Y3tEV4 +.. section: Library + +Allows creating a :class:`ctypes.CDLL` without name when passing a handle as +an argument. + +.. + +.. date: 2025-07-17-16-12-23 +.. gh-issue: 136234 +.. nonce: VmTxtj +.. section: Library + +Fix :meth:`asyncio.WriteTransport.writelines` to be robust to connection +failure, by using the same behavior as +:meth:`~asyncio.WriteTransport.write`. + +.. + +.. date: 2025-07-10-21-02-43 +.. gh-issue: 136507 +.. nonce: pnEuGS +.. section: Library + +Fix mimetypes CLI to handle multiple file parameters. + +.. + +.. date: 2025-07-01-04-57-57 +.. gh-issue: 136057 +.. nonce: 4-t596 +.. section: Library + +Fixed the bug in :mod:`pdb` and :mod:`bdb` where ``next`` and ``step`` can't +go over the line if a loop exists in the line. + +.. + +.. date: 2025-06-16-15-00-13 +.. gh-issue: 135386 +.. nonce: lNrxLc +.. section: Library + +Fix opening a :mod:`dbm.sqlite3` database for reading from read-only file or +directory. + +.. + +.. date: 2025-06-16-12-37-02 +.. gh-issue: 135444 +.. nonce: An2eeA +.. section: Library + +Fix :meth:`asyncio.DatagramTransport.sendto` to account for datagram header +size when data cannot be sent. + +.. + +.. date: 2025-06-10-21-00-48 +.. gh-issue: 126631 +.. nonce: eITVJd +.. section: Library + +Fix :mod:`multiprocessing` ``forkserver`` bug which prevented ``__main__`` +from being preloaded. + +.. + +.. date: 2025-06-10-18-02-29 +.. gh-issue: 135307 +.. nonce: fXGrcK +.. section: Library + +:mod:`email`: Fix exception in ``set_content()`` when encoding text and +max_line_length is set to ``0`` or ``None`` (unlimited). + +.. + +.. date: 2025-05-30-18-37-44 +.. gh-issue: 134453 +.. nonce: kxkA-o +.. section: Library + +Fixed :func:`subprocess.Popen.communicate` ``input=`` handling of +:class:`memoryview` instances that were non-byte shaped on POSIX platforms. +Those are now properly cast to a byte shaped view instead of truncating the +input. Windows platforms did not have this bug. + +.. + +.. date: 2025-05-26-10-52-27 +.. gh-issue: 134698 +.. nonce: aJ1mZ1 +.. section: Library + +Fix a crash when calling methods of :class:`ssl.SSLContext` or +:class:`ssl.SSLSocket` across multiple threads. + +.. + +.. date: 2025-05-10-17-42-03 +.. gh-issue: 125996 +.. nonce: vaQp0- +.. section: Library + +Fix thread safety of :class:`collections.OrderedDict`. Patch by Kumar +Aditya. + +.. + +.. date: 2025-05-10-15-10-54 +.. gh-issue: 133789 +.. nonce: I-ZlUX +.. section: Library + +Fix unpickling of :mod:`pathlib` objects that were pickled in Python 3.13. + +.. + +.. date: 2025-04-21-01-05-14 +.. gh-issue: 127081 +.. nonce: Egrpq7 +.. section: Library + +Fix libc thread safety issues with :mod:`dbm` by performing stateful +operations in critical sections. + +.. + +.. date: 2025-04-16-21-02-57 +.. gh-issue: 132551 +.. nonce: Psa7pL +.. section: Library + +Make :class:`io.BytesIO` safe in :term:`free-threaded <free threading>` +build. + +.. + +.. date: 2025-03-27-08-13-32 +.. gh-issue: 131788 +.. nonce: 0RWiFc +.. section: Library + +Make ``ResourceTracker.send`` from :mod:`multiprocessing` re-entrant safe + +.. + +.. date: 2024-05-13-09-50-31 +.. gh-issue: 118981 +.. nonce: zgOQPv +.. section: Library + +Fix potential hang in ``multiprocessing.popen_spawn_posix`` that can happen +when the child proc dies early by closing the child fds right away. + +.. + +.. date: 2023-03-21-10-59-40 +.. gh-issue: 102431 +.. nonce: eUDnf4 +.. section: Library + +Clarify constraints for "logical" arguments in methods of +:class:`decimal.Context`. + +.. + +.. date: 2023-02-13-20-34-52 +.. gh-issue: 78319 +.. nonce: V1zzed +.. section: Library + +UTF8 support for the IMAP APPEND command has been made RFC compliant. + +.. + +.. bpo: 38735 +.. date: 2022-01-07-16-56-57 +.. nonce: NFfJX6 +.. section: Library + +Fix failure when importing a module from the root directory on unix-like +platforms with sys.pycache_prefix set. + +.. + +.. bpo: 41839 +.. date: 2020-09-23-11-54-17 +.. nonce: kU5Ywl +.. section: Library + +Allow negative priority values from :func:`os.sched_get_priority_min` and +:func:`os.sched_get_priority_max` functions. + +.. + +.. date: 2025-10-09-12-53-47 +.. gh-issue: 96491 +.. nonce: 4YKxvy +.. section: IDLE + +Deduplicate version number in IDLE shell title bar after saving to a file. + +.. + +.. date: 2025-10-08-08-35-50 +.. gh-issue: 139742 +.. nonce: B3fZLg +.. section: IDLE + +Colorize t-string prefixes for template strings in IDLE, as done for +f-string prefixes. + +.. + +.. date: 2025-11-26-23-30-09 +.. gh-issue: 141994 +.. nonce: arBEG6 +.. section: Documentation + +:mod:`xml.sax.handler`: Make Documentation of +:data:`xml.sax.handler.feature_external_ges` warn of opening up to `external +entity attacks <https://en.wikipedia.org/wiki/XML_external_entity_attack>`_. +Patch by Sebastian Pipping. + +.. + +.. date: 2025-10-27-23-06-01 +.. gh-issue: 140578 +.. nonce: FMBdEn +.. section: Documentation + +Remove outdated sencence in the documentation for :mod:`multiprocessing`, +that implied that :class:`concurrent.futures.ThreadPoolExecutor` did not +exist. + +.. + +.. date: 2025-12-01-20-41-26 +.. gh-issue: 142048 +.. nonce: c2YosX +.. section: Core and Builtins + +Fix quadratically increasing garbage collection delays in free-threaded +build. + +.. + +.. date: 2025-11-25-13-13-34 +.. gh-issue: 116738 +.. nonce: MnZRdV +.. section: Core and Builtins + +Fix thread safety issue with :mod:`re` scanner objects in free-threaded +builds. + +.. + +.. date: 2025-11-24-21-09-30 +.. gh-issue: 141930 +.. nonce: hIIzSd +.. section: Core and Builtins + +When importing a module, use Python's regular file object to ensure that +writes to ``.pyc`` files are complete or an appropriate error is raised. + +.. + +.. date: 2025-11-22-10-43-26 +.. gh-issue: 120158 +.. nonce: 41_rXd +.. section: Core and Builtins + +Fix inconsistent state when enabling or disabling monitoring events too many +times. + +.. + +.. date: 2025-11-17-14-40-45 +.. gh-issue: 139653 +.. nonce: LzOy1M +.. section: Core and Builtins + +Only raise a ``RecursionError`` or trigger a fatal error if the stack +pointer is both below the limit pointer *and* above the stack base. If +outside of these bounds assume that it is OK. This prevents false positives +when user-space threads swap stacks. + +.. + +.. date: 2025-11-15-23-58-23 +.. gh-issue: 139103 +.. nonce: 9cVYJ0 +.. section: Core and Builtins + +Improve multithreaded scaling of dataclasses on the free-threaded build. + +.. + +.. date: 2025-11-15-01-21-00 +.. gh-issue: 141579 +.. nonce: aB7cD9 +.. section: Core and Builtins + +Fix :func:`sys.activate_stack_trampoline` to properly support the +``perf_jit`` backend. Patch by Pablo Galindo. + +.. + +.. date: 2025-11-14-16-25-15 +.. gh-issue: 114203 +.. nonce: n3tlQO +.. section: Core and Builtins + +Skip locking if object is already locked by two-mutex critical section. + +.. + +.. date: 2025-11-14-00-19-45 +.. gh-issue: 141528 +.. nonce: VWdax1 +.. section: Core and Builtins + +Suggest using :meth:`concurrent.interpreters.Interpreter.close` instead of +the private ``_interpreters.destroy`` function when warning about remaining +subinterpreters. Patch by Sergey Miryanov. + +.. + +.. date: 2025-11-10-23-07-06 +.. gh-issue: 141312 +.. nonce: H-58GB +.. section: Core and Builtins + +Fix the assertion failure in the ``__setstate__`` method of the range +iterator when a non-integer argument is passed. Patch by Sergey Miryanov. + +.. + +.. date: 2025-11-10-00-14-20 +.. gh-issue: 116738 +.. nonce: IxliC_ +.. section: Core and Builtins + +Make csv module thread-safe on the :term:`free threaded <free threading>` +build. + +.. + +.. date: 2025-11-03-17-21-38 +.. gh-issue: 140939 +.. nonce: FVboAw +.. section: Core and Builtins + +Fix memory leak when :class:`bytearray` or :class:`bytes` is formated with +the ``%*b`` format with a large width that results in a :exc:`MemoryError`. + +.. + +.. date: 2025-11-02-15-28-33 +.. gh-issue: 140260 +.. nonce: JNzlGz +.. section: Core and Builtins + +Fix :mod:`struct` data race in endian table initialization with +subinterpreters. Patch by Shamil Abdulaev. + +.. + +.. date: 2025-11-02-12-47-38 +.. gh-issue: 140530 +.. nonce: S934bp +.. section: Core and Builtins + +Fix a reference leak when ``raise exc from cause`` fails. Patch by Bénédikt +Tran. + +.. + +.. date: 2025-10-29-20-59-10 +.. gh-issue: 140373 +.. nonce: -uoaPP +.. section: Core and Builtins + +Correctly emit ``PY_UNWIND`` event when generator object is closed. Patch by +Mikhail Efimov. + +.. + +.. date: 2025-10-25-17-36-46 +.. gh-issue: 140576 +.. nonce: kj0SCY +.. section: Core and Builtins + +Fixed crash in :func:`tokenize.generate_tokens` in case of specific +incorrect input. Patch by Mikhail Efimov. + +.. + +.. date: 2025-10-24-20-42-33 +.. gh-issue: 140551 +.. nonce: -9swrl +.. section: Core and Builtins + +Fixed crash in :class:`dict` if :meth:`dict.clear` is called at the lookup +stage. Patch by Mikhail Efimov and Inada Naoki. + +.. + +.. date: 2025-10-24-20-16-42 +.. gh-issue: 140517 +.. nonce: cqun-K +.. section: Core and Builtins + +Fixed a reference leak when iterating over the result of :func:`map` with +``strict=True`` when the input iterables have different lengths. Patch by +Mikhail Efimov. + +.. + +.. date: 2025-10-23-16-05-50 +.. gh-issue: 140471 +.. nonce: Ax_aXn +.. section: Core and Builtins + +Fix potential buffer overflow in :class:`ast.AST` node initialization when +encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`. + +.. + +.. date: 2025-10-22-17-22-22 +.. gh-issue: 140431 +.. nonce: m8D_A- +.. section: Core and Builtins + +Fix a crash in Python's :term:`garbage collector <garbage collection>` due +to partially initialized :term:`coroutine` objects when coroutine origin +tracking depth is enabled (:func:`sys.set_coroutine_origin_tracking_depth`). + +.. + +.. date: 2025-10-21-09-20-03 +.. gh-issue: 140398 +.. nonce: SoABwJ +.. section: Core and Builtins + +Fix memory leaks in :mod:`readline` functions +:func:`~readline.read_init_file`, :func:`~readline.read_history_file`, +:func:`~readline.write_history_file`, and +:func:`~readline.append_history_file` when :c:func:`PySys_Audit` fails. + +.. + +.. date: 2025-10-21-06-51-50 +.. gh-issue: 140406 +.. nonce: 0gJs8M +.. section: Core and Builtins + +Fix memory leak when an object's :meth:`~object.__hash__` method returns an +object that isn't an :class:`int`. + +.. + +.. date: 2025-10-20-11-24-36 +.. gh-issue: 140358 +.. nonce: UQuKdV +.. section: Core and Builtins + +Restore elapsed time and unreachable object count in GC debug output. These +were inadvertently removed during a refactor of ``gc.c``. The debug log now +again reports elapsed collection time and the number of unreachable objects. +Contributed by Pål Grønås Drange. + +.. + +.. date: 2025-10-18-21-29-45 +.. gh-issue: 140306 +.. nonce: xS5CcS +.. section: Core and Builtins + +Fix memory leaks in cross-interpreter channel operations and shared +namespace handling. + +.. + +.. date: 2025-10-18-18-08-36 +.. gh-issue: 140301 +.. nonce: m-2HxC +.. section: Core and Builtins + +Fix memory leak of ``PyConfig`` in subinterpreters. + +.. + +.. date: 2025-10-17-20-23-19 +.. gh-issue: 140257 +.. nonce: 8Txmem +.. section: Core and Builtins + +Fix data race between interpreter_clear() and take_gil() on eval_breaker +during finalization with daemon threads. + +.. + +.. date: 2025-10-17-18-03-12 +.. gh-issue: 139951 +.. nonce: IdwM2O +.. section: Core and Builtins + +Fixes a regression in GC performance for a growing heap composed mostly of +small tuples. + +* Counts number of actually tracked objects, instead of trackable objects. + This ensures that untracking tuples has the desired effect of reducing GC overhead. +* Does not track most untrackable tuples during creation. + This prevents large numbers of small tuples causing excessive GCs. + +.. + +.. date: 2025-10-16-21-47-00 +.. gh-issue: 140104 +.. nonce: A8SQIm +.. section: Core and Builtins + +Fix a bug with exception handling in the JIT. Patch by Ken Jin. Bug reported +by Daniel Diniz. + +.. + +.. date: 2025-10-15-00-21-40 +.. gh-issue: 140061 +.. nonce: J0XeDV +.. section: Core and Builtins + +Fixing the checking of whether an object is uniquely referenced to ensure +free-threaded compatibility. Patch by Sergey Miryanov. + +.. + +.. date: 2025-10-14-17-07-37 +.. gh-issue: 140067 +.. nonce: ID2gOm +.. section: Core and Builtins + +Fix memory leak in sub-interpreter creation. + +.. + +.. date: 2025-10-13-17-56-23 +.. gh-issue: 140000 +.. nonce: tLhn3e +.. section: Core and Builtins + +Fix potential memory leak when a reference cycle exists between an instance +of :class:`typing.TypeAliasType`, :class:`typing.TypeVar`, +:class:`typing.ParamSpec`, or :class:`typing.TypeVarTuple` and its +``__name__`` attribute. Patch by Mikhail Efimov. + +.. + +.. date: 2025-10-13-13-54-19 +.. gh-issue: 139914 +.. nonce: M-y_3E +.. section: Core and Builtins + +Restore support for HP PA-RISC, which has an upwards-growing stack. + +.. + +.. date: 2025-10-12-11-00-06 +.. gh-issue: 139988 +.. nonce: 4wi51t +.. section: Core and Builtins + +Fix a memory leak when failing to create a :class:`~typing.Union` type. +Patch by Bénédikt Tran. + +.. + +.. date: 2025-10-08-13-52-00 +.. gh-issue: 139748 +.. nonce: jq0yFJ +.. section: Core and Builtins + +Fix reference leaks in error branches of functions accepting path strings or +bytes such as :func:`compile` and :func:`os.system`. Patch by Bénédikt Tran. + +.. + +.. date: 2025-10-06-13-15-26 +.. gh-issue: 139516 +.. nonce: d9Pkur +.. section: Core and Builtins + +Fix lambda colon erroneously start format spec in f-string in tokenizer. + +.. + +.. date: 2025-10-06-10-03-37 +.. gh-issue: 139640 +.. nonce: gY5oTb2 +.. section: Core and Builtins + +:func:`ast.parse` no longer emits syntax warnings for +``return``/``break``/``continue`` in ``finally`` (see :pep:`765`) -- they +are only emitted during compilation. + +.. + +.. date: 2025-10-06-10-03-37 +.. gh-issue: 139640 +.. nonce: gY5oTb +.. section: Core and Builtins + +Fix swallowing some syntax warnings in different modules if they +accidentally have the same message and are emitted from the same line. Fix +duplicated warnings in the ``finally`` block. + +.. + +.. date: 2025-10-01-18-21-19 +.. gh-issue: 63161 +.. nonce: ef1S6N +.. section: Core and Builtins + +Support non-UTF-8 shebang and comments in Python source files if non-UTF-8 +encoding is specified. Detect decoding error in comments for default (UTF-8) +encoding. Show the line and position of decoding error for default encoding +in a traceback. Show the line containing the coding cookie when it conflicts +with the BOM in a traceback. + +.. + +.. date: 2025-09-21-14-33-17 +.. gh-issue: 116738 +.. nonce: vNaI4h +.. section: Core and Builtins + +Make :mod:`mmap` thread-safe on the :term:`free threaded <free threading>` +build. + +.. + +.. date: 2025-09-17-17-17-21 +.. gh-issue: 138558 +.. nonce: 0VbzCH +.. section: Core and Builtins + +Fix handling of unusual t-string annotations in annotationlib. Patch by Dave +Peck. + +.. + +.. date: 2025-09-15-14-04-56 +.. gh-issue: 134466 +.. nonce: yR4fYW +.. section: Core and Builtins + +Don't run PyREPL in a degraded environment where setting termios attributes +is not allowed. + +.. + +.. date: 2025-09-15-13-06-11 +.. gh-issue: 138944 +.. nonce: PeCgLb +.. section: Core and Builtins + +Fix :exc:`SyntaxError` message when invalid syntax appears on the same line +as a valid ``import ... as ...`` or ``from ... import ... as ...`` +statement. Patch by Brian Schubert. + +.. + +.. date: 2025-09-06-13-53-33 +.. gh-issue: 105487 +.. nonce: a43YaY +.. section: Core and Builtins + +Remove non-existent :meth:`~object.__copy__`, :meth:`~object.__deepcopy__`, +and :attr:`~type.__bases__` from the :meth:`~object.__dir__` entries of +:class:`types.GenericAlias`. + +.. + +.. date: 2025-08-30-17-15-05 +.. gh-issue: 69605 +.. nonce: KjBk99 +.. section: Core and Builtins + +Fix some standard library submodules missing from the :term:`REPL` +auto-completion of imports. + +.. + +.. date: 2025-08-28-09-29-46 +.. gh-issue: 116738 +.. nonce: yLZJpV +.. section: Core and Builtins + +Make :mod:`cProfile` thread-safe on the :term:`free threaded <free +threading>` build. + +.. + +.. date: 2025-08-21-06-31-42 +.. gh-issue: 138004 +.. nonce: FH2Hre +.. section: Core and Builtins + +On Solaris/Illumos platforms, thread names are now encoded as ASCII to avoid +errors on systems (e.g. OpenIndiana) that don't support non-ASCII names. + +.. + +.. date: 2025-08-13-13-39-02 +.. gh-issue: 137433 +.. nonce: g6Atfz +.. section: Core and Builtins + +Fix a potential deadlock in the :term:`free threading` build when daemon +threads enable or disable profiling or tracing while the main thread is +shutting down the interpreter. + +.. + +.. date: 2025-08-07-09-52-19 +.. gh-issue: 137400 +.. nonce: AK1dy- +.. section: Core and Builtins + +Fix a crash in the :term:`free threading` build when disabling profiling or +tracing across all threads with :c:func:`PyEval_SetProfileAllThreads` or +:c:func:`PyEval_SetTraceAllThreads` or their Python equivalents +:func:`threading.settrace_all_threads` and +:func:`threading.setprofile_all_threads`. + +.. + +.. date: 2025-08-05-17-22-24 +.. gh-issue: 58124 +.. nonce: q1__53 +.. section: Core and Builtins + +Fix name of the Python encoding in Unicode errors of the code page codec: +use "cp65000" and "cp65001" instead of "CP_UTF7" and "CP_UTF8" which are not +valid Python code names. Patch by Victor Stinner. + +.. + +.. date: 2025-07-09-21-27-14 +.. gh-issue: 132657 +.. nonce: kSA8R3 +.. section: Core and Builtins + +Improve performance of :class:`frozenset` by removing locks in the +free-threading build. + +.. + +.. date: 2025-05-11-09-40-19 +.. gh-issue: 133400 +.. nonce: zkWla8 +.. section: Core and Builtins + +Fixed Ctrl+D (^D) behavior in _pyrepl module to match old pre-3.13 REPL +behavior. + +.. + +.. date: 2025-01-08-12-52-47 +.. gh-issue: 128640 +.. nonce: 9nbh9z +.. section: Core and Builtins + +Fix a crash when using threads inside of a subinterpreter. + +.. + +.. date: 2025-11-21-10-34-00 +.. gh-issue: 137422 +.. nonce: tzZKLi +.. section: C API + +Fix :term:`free threading` race condition in +:c:func:`PyImport_AddModuleRef`. It was previously possible for two calls to +the function return two different objects, only one of which was stored in +:data:`sys.modules`. + +.. + +.. date: 2025-11-18-04-16-09 +.. gh-issue: 140042 +.. nonce: S1C7id +.. section: C API + +Removed the sqlite3_shutdown call that could cause closing connections for +sqlite when used with multiple sub interpreters. + +.. + +.. date: 2025-11-06-06-28-14 +.. gh-issue: 141042 +.. nonce: brOioJ +.. section: C API + +Make qNaN in :c:func:`PyFloat_Pack2` and :c:func:`PyFloat_Pack4`, if while +conversion to a narrower precision floating-point format --- the remaining +after truncation payload will be zero. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-10-26-16-45-06 +.. gh-issue: 140487 +.. nonce: fGOqss +.. section: C API + +Fix :c:macro:`Py_RETURN_NOTIMPLEMENTED` in limited C API 3.11 and older: +don't treat ``Py_NotImplemented`` as immortal. Patch by Victor Stinner. + +.. + +.. date: 2025-10-15-15-59-59 +.. gh-issue: 140153 +.. nonce: BO7sH4 +.. section: C API + +Fix :c:func:`Py_REFCNT` definition on limited C API 3.11-3.13. Patch by +Victor Stinner. + +.. + +.. date: 2025-10-06-22-17-47 +.. gh-issue: 139653 +.. nonce: 6-1MOd +.. section: C API + +Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and +:c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set the +stack protection base address and stack protection size of a Python thread +state. Patch by Victor Stinner. + +.. + +.. date: 2025-11-28-19-49-01 +.. gh-issue: 141808 +.. nonce: cV5K12 +.. section: Build + +Do not generate the jit stencils twice in case of PGO builds on Windows. + +.. + +.. date: 2025-11-20-17-01-05 +.. gh-issue: 141784 +.. nonce: LkYI2n +.. section: Build + +Fix ``_remote_debugging_module.c`` compilation on 32-bit Linux. Include +Python.h before system headers to make sure that +``_remote_debugging_module.c`` uses the same types (ABI) than Python. Patch +by Victor Stinner. + +.. + +.. date: 2025-10-29-12-30-38 +.. gh-issue: 140768 +.. nonce: ITYrzw +.. section: Build + +Warn when the WASI SDK version doesn't match what's supported. + +.. + +.. date: 2025-10-25-08-07-06 +.. gh-issue: 140513 +.. nonce: 6OhLTs +.. section: Build + +Generate a clear compilation error when ``_Py_TAIL_CALL_INTERP`` is enabled +but either ``preserve_none`` or ``musttail`` is not supported. + +.. + +.. date: 2025-10-16-11-30-53 +.. gh-issue: 140189 +.. nonce: YCrUyt +.. section: Build + +iOS builds were added to CI. + +.. + +.. date: 2025-09-24-13-59-26 +.. gh-issue: 138489 +.. nonce: 1AcuZM +.. section: Build + +When cross-compiling for WASI by ``build_wasm`` or ``build_emscripten``, the +``build-details.json`` step is now included in the build process, just like +with native builds. + +This fixes the ``libinstall`` task which requires the ``build-details.json`` +file during the process. + +.. + +.. date: 2025-08-10-22-28-06 +.. gh-issue: 137618 +.. nonce: FdNvIE +.. section: Build + +``PYTHON_FOR_REGEN`` now requires Python 3.10 to Python 3.15. Patch by Adam +Turner. + +.. + +.. date: 2025-01-03-13-02-06 +.. gh-issue: 123681 +.. nonce: gQ67nK +.. section: Build + +Check the ``strftime()`` behavior at runtime instead of at the compile time +to support cross-compiling. Remove the internal macro +``_Py_NORMALIZE_CENTURY``. diff --git a/Misc/NEWS.d/3.14.2.rst b/Misc/NEWS.d/3.14.2.rst new file mode 100644 index 00000000000000..b4b4c48da22028 --- /dev/null +++ b/Misc/NEWS.d/3.14.2.rst @@ -0,0 +1,85 @@ +.. date: 2025-12-01-09-36-45 +.. gh-issue: 142145 +.. nonce: tcAUhg +.. release date: 2025-12-05 +.. section: Security + +Remove quadratic behavior in ``xml.minidom`` node ID cache clearing. + +.. + +.. date: 2024-05-23-11-44-41 +.. gh-issue: 119452 +.. nonce: PRfsSv +.. section: Security + +Fix a potential memory denial of service in the :mod:`http.server` module. +When a malicious user is connected to the CGI server on Windows, it could +cause an arbitrary amount of memory to be allocated. This could have led to +symptoms including a :exc:`MemoryError`, swapping, out of memory (OOM) +killed processes or containers, or even system crashes. + +.. + +.. date: 2025-12-05-17-58-29 +.. gh-issue: 140797 +.. nonce: YxB27u +.. section: Library + +Revert changes to the undocumented :class:`!re.Scanner` class. Capturing +groups are still allowed for backward compatibility, although using them can +lead to incorrect result. They will be forbidden in future Python versions. + +.. + +.. date: 2025-12-03-09-36-29 +.. gh-issue: 142206 +.. nonce: ilwegH +.. section: Library + +The resource tracker in the :mod:`multiprocessing` module now uses the +original communication protocol, as in Python 3.14.0 and below, by default. +This avoids issues with upgrading Python while it is running. (Note that +such 'in-place' upgrades are not tested.) The tracker remains compatible +with subprocesses that use new protocol (that is, subprocesses using Python +3.13.10, 3.14.1 and 3.15). + +.. + +.. date: 2025-12-03-06-12-39 +.. gh-issue: 142214 +.. nonce: appYNZ +.. section: Library + +Fix two regressions in :mod:`dataclasses` in Python 3.14.1 related to +annotations. + +* An exception is no longer raised if ``slots=True`` is used and the + ``__init__`` method does not have an ``__annotate__`` attribute + (likely because ``init=False`` was used). + +* An exception is no longer raised if annotations are requested on the + ``__init__`` method and one of the fields is not present in the class + annotations. This can occur in certain dynamic scenarios. + +Patch by Jelle Zijlstra. + +.. + +.. date: 2025-12-03-11-03-35 +.. gh-issue: 142218 +.. nonce: 44Fq_J +.. section: Core and Builtins + +Fix crash when inserting into a split table dictionary with a non +:class:`str` key that matches an existing key. + +.. + +.. date: 2025-12-01-10-03-08 +.. gh-issue: 116738 +.. nonce: 972YsG +.. section: Core and Builtins + +Fix :mod:`cmath` data race when initializing trigonometric tables with +subinterpreters. diff --git a/Misc/NEWS.d/next/Build/2025-08-27-09-52-45.gh-issue-138061.fMVS9w.rst b/Misc/NEWS.d/next/Build/2025-08-27-09-52-45.gh-issue-138061.fMVS9w.rst new file mode 100644 index 00000000000000..7af79d0b87ef55 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-08-27-09-52-45.gh-issue-138061.fMVS9w.rst @@ -0,0 +1 @@ +Ensure reproducible builds by making JIT stencil header generation deterministic. diff --git a/Misc/NEWS.d/next/Build/2025-12-09-13-33-46.gh-issue-142454.cqUxzQ.rst b/Misc/NEWS.d/next/Build/2025-12-09-13-33-46.gh-issue-142454.cqUxzQ.rst new file mode 100644 index 00000000000000..4de16866f28851 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-12-09-13-33-46.gh-issue-142454.cqUxzQ.rst @@ -0,0 +1,4 @@ +When calculating the digest of the JIT stencils input, sort the hashed files +by filenames before adding their content to the hasher. This ensures +deterministic hash input and hence deterministic hash, independent on +filesystem order. diff --git a/Misc/NEWS.d/next/C_API/2025-12-11-09-06-36.gh-issue-142571.Csdxnn.rst b/Misc/NEWS.d/next/C_API/2025-12-11-09-06-36.gh-issue-142571.Csdxnn.rst new file mode 100644 index 00000000000000..ea419b4fe1d6b0 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-12-11-09-06-36.gh-issue-142571.Csdxnn.rst @@ -0,0 +1 @@ +:c:func:`!PyUnstable_CopyPerfMapFile` now checks that opening the file succeeded before flushing. diff --git a/Misc/NEWS.d/next/C_API/2025-12-11-13-01-49.gh-issue-142589.nNAqgw.rst b/Misc/NEWS.d/next/C_API/2025-12-11-13-01-49.gh-issue-142589.nNAqgw.rst new file mode 100644 index 00000000000000..529277b951ada3 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-12-11-13-01-49.gh-issue-142589.nNAqgw.rst @@ -0,0 +1,2 @@ +Fix :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary()` handling of +tagged ints on the interpreter stack. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-22-16-20-06.gh-issue-137007.1oPvvK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-22-16-20-06.gh-issue-137007.1oPvvK.rst new file mode 100644 index 00000000000000..cb25fd10c0bd2c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-22-16-20-06.gh-issue-137007.1oPvvK.rst @@ -0,0 +1 @@ +Fix a bug during JIT compilation failure which caused garbage collection debug assertions to fail. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-18-26-50.gh-issue-142282.g6RQUN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-18-26-50.gh-issue-142282.g6RQUN.rst new file mode 100644 index 00000000000000..d038cd40f4f57a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-05-18-26-50.gh-issue-142282.g6RQUN.rst @@ -0,0 +1 @@ +Fix :func:`winreg.QueryValueEx` to not accidentally read garbage buffer under race condition. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-13-04-37.gh-issue-142343.BTAyML.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-13-04-37.gh-issue-142343.BTAyML.rst new file mode 100644 index 00000000000000..9da936c2a6f3bd --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-13-04-37.gh-issue-142343.BTAyML.rst @@ -0,0 +1 @@ +Fix SIGILL crash on m68k due to incorrect assembly constraint. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-15-46-06.gh-issue-133932.HAxa4p.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-15-46-06.gh-issue-133932.HAxa4p.rst new file mode 100644 index 00000000000000..460226303599e2 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-15-46-06.gh-issue-133932.HAxa4p.rst @@ -0,0 +1,2 @@ +Fix crash in the free threading build when clearing frames that hold tagged +integers. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-17-34-57.gh-issue-142402.iV0ON3.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-17-34-57.gh-issue-142402.iV0ON3.rst new file mode 100644 index 00000000000000..bad31470a25552 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-08-17-34-57.gh-issue-142402.iV0ON3.rst @@ -0,0 +1,3 @@ +Fix reference counting when adjacent literal parts are merged while constructing +:class:`string.templatelib.Template`, preventing the displaced string object +from leaking. diff --git a/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst b/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst new file mode 100644 index 00000000000000..d2edc5b2cb743d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst @@ -0,0 +1,2 @@ +Fix :meth:`asyncio.run_coroutine_threadsafe` leaving underlying cancelled +asyncio task running. diff --git a/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst b/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst new file mode 100644 index 00000000000000..49643892ff9ccd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst @@ -0,0 +1 @@ +Fix a bug in the :mod:`email.policy.default` folding algorithm which incorrectly resulted in a doubled newline when a line ending at exactly max_line_length was followed by an unfoldable token. diff --git a/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst b/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst new file mode 100644 index 00000000000000..bd3e53c9f8193f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst @@ -0,0 +1 @@ +``MisplacedEnvelopeHeaderDefect`` and ``Missing header name`` defects are now correctly passed to the ``handle_defect`` method of ``policy`` in :class:`~email.parser.FeedParser`. diff --git a/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst b/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst new file mode 100644 index 00000000000000..f46e82105fc2f5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst @@ -0,0 +1 @@ +Improve :mod:`argparse` performance by caching the formatter used for argument validation. diff --git a/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst b/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst new file mode 100644 index 00000000000000..5ca3fc05b9816d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst @@ -0,0 +1 @@ +Fix :mod:`argparse` to preserve ``|`` separators in mutually exclusive groups when the usage line wraps due to length. diff --git a/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst b/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst new file mode 100644 index 00000000000000..ee2d5e1d4911a7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst @@ -0,0 +1,2 @@ +Fix usage formatting for positional arguments in mutually exclusive groups in :mod:`argparse`. +in :mod:`argparse`. diff --git a/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst new file mode 100644 index 00000000000000..cf570f314c00cc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst @@ -0,0 +1,3 @@ +Fix usage formatting for mutually exclusive groups in :mod:`argparse` +when they are preceded by positional arguments or followed or intermixed +with other optional arguments. diff --git a/Misc/NEWS.d/next/Library/2025-12-09-14-40-45.gh-issue-112527.Tvf5Zk.rst b/Misc/NEWS.d/next/Library/2025-12-09-14-40-45.gh-issue-112527.Tvf5Zk.rst new file mode 100644 index 00000000000000..70447bc6437677 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-09-14-40-45.gh-issue-112527.Tvf5Zk.rst @@ -0,0 +1,2 @@ +The help text for required options in :mod:`argparse` no +longer extended with " (default: None)". diff --git a/Misc/NEWS.d/next/Library/2025-12-10-11-20-05.gh-issue-123241.oYg2n7.rst b/Misc/NEWS.d/next/Library/2025-12-10-11-20-05.gh-issue-123241.oYg2n7.rst new file mode 100644 index 00000000000000..871a03a6fd1021 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-10-11-20-05.gh-issue-123241.oYg2n7.rst @@ -0,0 +1,2 @@ +Avoid reference count operations in garbage collection of :mod:`ctypes` +objects. diff --git a/Misc/NEWS.d/next/Library/2025-12-11-09-03-07.gh-issue-142556.RuiBte.rst b/Misc/NEWS.d/next/Library/2025-12-11-09-03-07.gh-issue-142556.RuiBte.rst new file mode 100644 index 00000000000000..782e62b65a36f3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-11-09-03-07.gh-issue-142556.RuiBte.rst @@ -0,0 +1 @@ +Fix crash when a task gets re-registered during finalization in :mod:`asyncio`. Patch by Kumar Aditya. diff --git a/Misc/externals.spdx.json b/Misc/externals.spdx.json index 69f3beec82ed34..59aceedb94d4c7 100644 --- a/Misc/externals.spdx.json +++ b/Misc/externals.spdx.json @@ -70,42 +70,42 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "6bb739ecddbd2cfb6d255eb5898437a9b5739277dee931338d3275bac5d96ba2" + "checksumValue": "9b07560b6c1afa666bd78b8d3aa5c83fdda02149afdf048596d5b0e0dac1ee55" } ], - "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/openssl-3.0.16.tar.gz", + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/openssl-3.0.18.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:openssl:openssl:3.0.16:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:openssl:openssl:3.0.18:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], "licenseConcluded": "NOASSERTION", "name": "openssl", "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.0.16" + "versionInfo": "3.0.18" }, { "SPDXID": "SPDXRef-PACKAGE-sqlite", "checksums": [ { "algorithm": "SHA256", - "checksumValue": "e335aeb44fa36cde60ecbb6a9f8be6f5d449d645ce9b0199ee53a7e6728d19d2" + "checksumValue": "fb5ab81f27612b0a7b4861ba655906c76dc85ee969e7a4905d2075aff931e8d0" } ], - "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/sqlite-3.49.1.0.tar.gz", + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/sqlite-3.50.4.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:sqlite:sqlite:3.49.1.0:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:sqlite:sqlite:3.50.4.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], "licenseConcluded": "NOASSERTION", "name": "sqlite", "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.49.1.0" + "versionInfo": "3.50.4.0" }, { "SPDXID": "SPDXRef-PACKAGE-tcl-core", diff --git a/Misc/python.man b/Misc/python.man index 15174b62d5fea4..612e2bbf56800e 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -529,11 +529,11 @@ See also the \fB\-X frozen_modules\fR option. If this variable is set to 1, the global interpreter lock (GIL) will be forced on. Setting it to 0 forces the GIL off. Only available in builds configured with \fB\-\-disable\-gil\fP. +.IP +This is equivalent to the \fB\-X gil\fR option. .IP PYTHON_HISTORY This environment variable can be used to set the location of a history file (on Unix, it is \fI~/.python_history\fP by default). -.IP -This is equivalent to the \fB\-X gil\fR option. .IP PYTHONNODEBUGRANGES If this variable is set, it disables the inclusion of the tables mapping extra location information (end line, start column offset and end column diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 4a697d047ca6e4..d64857ce233b5c 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -48,11 +48,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "6984055af7b4e01429d8ebc910fe2be900d8ee9c" + "checksumValue": "a4395dd0589a97aab0904f7a5f5dc5781a086aa2" }, { "algorithm": "SHA256", - "checksumValue": "7c16a5cf0eea844ae579db083b8d75f23a71859cac77e3c4cb7a8fa3b7621685" + "checksumValue": "610b844bbfa3ec955772cc825db4d4db470827d57adcb214ad372d0eaf00e591" } ], "fileName": "Modules/expat/expat.h" @@ -62,11 +62,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "9e615c6e5c3ba00670f674a6b071bb855b0b563d" + "checksumValue": "c22196e3d8bee88fcdda715623b3b9d2119d2fb3" }, { "algorithm": "SHA256", - "checksumValue": "3d90a4b65c40a3f848c36100f4d73b933a015c7b7cd85c28e4331a6b845c1ad0" + "checksumValue": "f2c2283ba03b057e92beefc7f81ba901ebb6dfc1a45b036c8a7d65808eb77a84" } ], "fileName": "Modules/expat/expat_external.h" @@ -90,11 +90,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "60b0ee8b4a93ef0276193ed1051c15ecab73c02e" + "checksumValue": "7dce7d98943c5db33ae05e54801dcafb4547b9dd" }, { "algorithm": "SHA256", - "checksumValue": "6af6e8fbf5c83c1431464a2811b10ea2d1ff64c0eabfd9f18b1d4e53bf400c35" + "checksumValue": "6bfe307d52e7e4c71dbc30d3bd902a4905cdd83bbe4226a7e8dfa8e4c462a157" } ], "fileName": "Modules/expat/internal.h" @@ -174,11 +174,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "3db0435d69e5eb904c9c88400a5ab073a81049bc" + "checksumValue": "4c81a1f04fc653877c63c834145c18f93cd95f3e" }, { "algorithm": "SHA256", - "checksumValue": "633b272fa893dfbef539edbba35f1b11ecf09a13b89189105b0dfa6c7ecfc3bf" + "checksumValue": "04a379615f476d55f95ca1853107e20627b48ca4afe8d0fd5981ac77188bf0a6" } ], "fileName": "Modules/expat/xmlparse.c" @@ -202,11 +202,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c961fb1a80f7b0601a63e69fba793fe5f6dff157" + "checksumValue": "ac2964cca107f62dd133bfd4736a9a17defbc401" }, { "algorithm": "SHA256", - "checksumValue": "228470eb9181a9a7575b63137edcb61b817ee4e0923faffdbeba29e07c939713" + "checksumValue": "92e41f373b67f6e0dcd7735faef3c3f1e2c17fe59e007e6b74beef6a2e70fa88" } ], "fileName": "Modules/expat/xmlrole.h" @@ -216,11 +216,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "8394790c0199c8f88108542ad78f23095d28a3fe" + "checksumValue": "1e2d35d90a1c269217f83d3bdf3c71cc22cb4c3f" }, { "algorithm": "SHA256", - "checksumValue": "5b16c671ccc42496374762768e4bf48f614aecfd2025a07925b8d94244aec645" + "checksumValue": "98d0fc735041956cc2e7bbbe2fb8f03130859410e0aee5e8015f406a37c02a3c" } ], "fileName": "Modules/expat/xmltok.c" @@ -230,11 +230,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7d2943a0128094455004b1a98007b98734221bae" + "checksumValue": "d126831eaa5158cff187a8c93f4bc1c8118f3b17" }, { "algorithm": "SHA256", - "checksumValue": "6b8919dc951606dc6f2b0175f8955a9ced901ce8bd08db47f291b6c04227ae7f" + "checksumValue": "91bf003a725a675761ea8d92cebc299a76fd28c3a950572f41bc7ce5327ee7b5" } ], "fileName": "Modules/expat/xmltok.h" @@ -300,11 +300,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "de7179fe6970e2b5d281dfed977ed91be635b8d2" + "checksumValue": "a57d53c35aa916ce0cd1567c8f10dcea58270321" }, { "algorithm": "SHA256", - "checksumValue": "c0ba888d87775c7d7f7d8a08dac7b3988fed81e11bb52396d90f762a8e90a7eb" + "checksumValue": "d68209032703137326f9aadd9abac8fe4a5c92d391e2f43b0414fa63087adc6b" } ], "fileName": "Modules/_hacl/Hacl_HMAC.h" @@ -314,11 +314,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4b6e7696e8d84f322fb24b1fbb08ccb9b0e7d51b" + "checksumValue": "808af7ff8a2cb2b4ef3a9ce3dbfef58d90828c9f" }, { "algorithm": "SHA256", - "checksumValue": "50a65a34a7a7569eedf7fa864a7892eeee5840a7fdf6fa8f1e87d42c65f6c877" + "checksumValue": "6a492aa586f2d10b1b300ce8ce4c72c976ff7548fee667aded2253f99969ac87" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" @@ -328,11 +328,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7c66ac004a1dcf3fee0ab9aa62d61972f029de3a" + "checksumValue": "4d767388a34e2a2000e0cbd31f06cf36af1ae10d" }, { "algorithm": "SHA256", - "checksumValue": "9a7239a01a4ee8defbe3ebd9f0d12c873a1dd8e0659070380b2eab3ab0177333" + "checksumValue": "fac493e96af252abcf5705f0ab4eec59c1f3bc8244e105d75a57c43552dd1569" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.h" @@ -342,11 +342,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0f75e44a42775247a46acc2beaa6bae8f199a3d9" + "checksumValue": "1bb072f2be9e5d194274fdcc87825cb094e4b32e" }, { "algorithm": "SHA256", - "checksumValue": "03b612c24193464ed6848aeebbf44f9266b78ec6eed2486056211cde8992c49a" + "checksumValue": "9eb22953ce60dde9dc970fec9dfce9d94235f4b7ccd8f0151cad4707dc835d1d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" @@ -356,11 +356,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7f273d26942233e5dcdfb4c1a16ff2486b15f899" + "checksumValue": "280fd03ee23e13ecf90711d2be8230035d957343" }, { "algorithm": "SHA256", - "checksumValue": "dbc0dacc68ed52dbf1b7d6fba2c87870317998bc046e65f6deaaa150625432f8" + "checksumValue": "817dcd05d06d804587fce7d8f2f3f42a6fcf6818d2419a3551ef0df70cb7125a" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h" @@ -384,11 +384,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "65bf44140691b046dcfed3ab1576dbf8bbf96dc5" + "checksumValue": "cdd6e9ca86dbede92d1a47b9224d2af70c599a71" }, { "algorithm": "SHA256", - "checksumValue": "0f98959dafffce039ade9d296f7a05bed151c9c512498f48e4b326a5523a240b" + "checksumValue": "f3204f3e60734d811b6630f879b69ce54eaf367f3fca5889c1026e7a1f3ee1e4" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" @@ -398,11 +398,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2120c8c467aeebcc7c8b9678c15e79648433b91a" + "checksumValue": "79f47ab5458d88bce0f210a566aa117ce2125049" }, { "algorithm": "SHA256", - "checksumValue": "45735f7fe2dbbad7656d07854e9ec8176ad26c79f90dcc0fec0b9a59a6311ba7" + "checksumValue": "5cc96313f8c066f055c2819e473c79aeff086ba91a1449d54aa569127cd8601e" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.h" @@ -412,11 +412,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0da9782455923aede8d8dce9dfdc38f4fc1de572" + "checksumValue": "4056bb6e3ed184400d1610bdfd4260b3fd05ccbb" }, { "algorithm": "SHA256", - "checksumValue": "2d17ae768fd3d7d6decddd8b4aaf23ce02a809ee62bb98da32c8a7f54acf92d0" + "checksumValue": "c93746df2f219cbb1634ee6fb0ab1c4cbd381d1f36c637114c68346c9935326d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" @@ -426,11 +426,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "9028e24c9876d9d16b2435ec29240c6b57bfe2a0" + "checksumValue": "c98890b6193baa3dbd472cfb67f22aea3dd0d3e1" }, { "algorithm": "SHA256", - "checksumValue": "062e3b856acac4f929c1e04b8264a754cad21ca6580215f7094a3f0a04edb912" + "checksumValue": "fe3f17bf237166f872ba88c8204333c4f64bdc4bb29c265448581c7bfea4b151" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h" @@ -454,11 +454,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "38e8d96ef1879480780494058a93cec181f8d6d7" + "checksumValue": "61f678cd9234c6eab5d4409ae66f470b068b959b" }, { "algorithm": "SHA256", - "checksumValue": "61e77d2063cf60c96e9ce06af215efe5d42c43026833bffed5732326fe97ed1e" + "checksumValue": "5b91ed0339074e2e546119833398e2cdb7190c33a59c405bf43b2417c789547d" } ], "fileName": "Modules/_hacl/Hacl_Hash_MD5.c" @@ -468,11 +468,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e67a9bc18358c57afaeff3a174893ddfdb52dfc6" + "checksumValue": "2ed70f612a998ef6c04d62f9487a1b2534e6d76a" }, { "algorithm": "SHA256", - "checksumValue": "16e982081f6c2fd03ea751fcc64f5a835c94652841836e231fe562b9e287f4bc" + "checksumValue": "a34534ef36bc8428ac1169ca5f4f1c17a0817507ebae388e3dd825d1a331f28d" } ], "fileName": "Modules/_hacl/Hacl_Hash_MD5.h" @@ -482,11 +482,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "986dd5ba0b34d15f3e5e5c656979aea1b502e8aa" + "checksumValue": "c7fc5c9721caf37c5a24c9beff27b0ac2ed68cc9" }, { "algorithm": "SHA256", - "checksumValue": "38d5f1f2e67a0eb30789f81fc56c07a6e7246e2b1be6c65485bcca1dcd0e0806" + "checksumValue": "ce08721d491f3b8a9bd4cde6c27bfcc8fc01471512ccca4bd3c0b764cb551d29" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA1.c" @@ -496,11 +496,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "f1ca21f1ee8b15ad9ccfbda72165b9d86912166c" + "checksumValue": "a3f9bdc9e73f80eb4a586dc180246cfb8267ffc0" }, { "algorithm": "SHA256", - "checksumValue": "4b2ad9ea93fdd9c2fdc521fc4e14e02550666c2717a23b85819db2e07ea555f3" + "checksumValue": "2d5cae94382f5473cf6d2654760375ff1ee9cebdb8d4506b63ba33f488de1559" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA1.h" @@ -510,11 +510,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "f732a6710fe3e13cd28130f0f20504e347d1c412" + "checksumValue": "fffe8c4f67669ac8ccd87c2e0f95db2427481df1" }, { "algorithm": "SHA256", - "checksumValue": "86cf32e4d1f3ba93a94108271923fdafe2204447792a918acf4a2250f352dbde" + "checksumValue": "f8af382de7e29a73c726f9c70770498ddd99e2c4702489ed6e634f0b68597c95" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA2.c" @@ -524,11 +524,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "f38cebeeca40a83aeb2cf5dfce578ffefe176d84" + "checksumValue": "68b35d0c573f7301ba4d6919b1680d55675a0d98" }, { "algorithm": "SHA256", - "checksumValue": "ee03bf9368d1a3a3c70cfd4e9391b2485466404db4a60bfc5319630cc314b590" + "checksumValue": "442d8997d2bcda20ddf628665e2e69945400e1ab3019bc14fc7c8e20db20c320" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA2.h" @@ -538,11 +538,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "50f75337b31f509b5bfcc7ebb3d066b82a0f1b33" + "checksumValue": "77d3d879dfa5949030bca0e8ee75d3d369ec54a7" }, { "algorithm": "SHA256", - "checksumValue": "c9e1442899e5b902fa39f413f1a3131f7ab5c2283d5100dc8ac675a7d5ebbdf1" + "checksumValue": "8744f5b6e054c3e5c44f413a60f9154b76dd7230135dcee26fd063755ec64be1" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.c" @@ -552,11 +552,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "01717207aef77174e328186d48c27517f6644c15" + "checksumValue": "db1008095b64b2f8ee2a4ce72fa7cfc4a622a108" }, { "algorithm": "SHA256", - "checksumValue": "620dded172e94cb3f25f9904b44977d91f2cc9573e41b38f19e929d083ae0308" + "checksumValue": "961a686186b76a1cd6a64bcfd06afdc8657b1f901bac991166f498ed16f9349a" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.h" @@ -566,11 +566,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "8140310f505bb2619a749777a91487d666237bcf" + "checksumValue": "eb224e26bc0503a48c88c837e28bbc7a9878ba5c" }, { "algorithm": "SHA256", - "checksumValue": "9d95e6a651c22185d9b7c38f363d30159f810e6fcdc2208f29492837ed891e82" + "checksumValue": "29fcf948a3715e09cbbd434cebb6105f276236a6e4947d237b7bf06634bf2432" } ], "fileName": "Modules/_hacl/Hacl_Streaming_HMAC.c" @@ -580,11 +580,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "49523144583a15d96ba1646af02dc292e633bf8f" + "checksumValue": "2d6c600024275c780e8313e0a5ab506e025bb4d6" }, { "algorithm": "SHA256", - "checksumValue": "78345519bf6789264f6792b809ee97a9ecf7cb5829c674c61e2d99bfdfdc36fc" + "checksumValue": "6450aef92f507fb0a8a3086b1d2792e7fca07121580c24fdaedd1b32e9ad0a76" } ], "fileName": "Modules/_hacl/Hacl_Streaming_HMAC.h" @@ -594,11 +594,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "372448599774a98e5c5d083e91f301ed1c4b822e" + "checksumValue": "ed283b95ebb772b05bdf802b70dbb301ece84e91" }, { "algorithm": "SHA256", - "checksumValue": "95d8e70ca4bc6aa98f6d2435ceb6410ead299b1f700fae1f5c603ec3f57ea551" + "checksumValue": "efc7bd11460744768425aedf4e004d3ad3397a5489752b80044ae785f30b78d4" } ], "fileName": "Modules/_hacl/Hacl_Streaming_Types.h" @@ -608,11 +608,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c9651ef21479c4d8a3b04c5baa1902866dbb1cdf" + "checksumValue": "0a0b7f3714167ad45ddf5a6a48d76f525a119c9c" }, { "algorithm": "SHA256", - "checksumValue": "e039c82ba670606ca111573942baad800f75da467abbc74cd7d1fe175ebcdfaf" + "checksumValue": "135d4afb4812468885c963c9c87a55ba5fae9181df4431af5fbad08588dda229" } ], "fileName": "Modules/_hacl/Lib_Memzero0.c" @@ -622,11 +622,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "eaa543c778300238dc23034aafeada0951154af1" + "checksumValue": "ae06c415ae7e0e1e85558863f29d1b9013e46e9b" }, { "algorithm": "SHA256", - "checksumValue": "3fd2552d527a23110d61ad2811c774810efb1eaee008f136c2a0d609daa77f5b" + "checksumValue": "69dc2e78411e9b271100eb0d2a2d8dc39dd2348afc26f8b4cfba14c025102c7f" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt128_Verified.h" @@ -636,11 +636,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "41ee1e34ede7ef5b24b87d4ca816fd6d9fac8010" + "checksumValue": "edefe48ced707327fd6cdf3f18b3ca42dda9a9f7" }, { "algorithm": "SHA256", - "checksumValue": "d48ed03e504cb87793a310a9552fb3ba2ebd6fe90127b7d642c8740fba1b9748" + "checksumValue": "f01e3f0892935f3f659019875f580445b9ea23482afbe11ffbe7cdbd22dbd4d2" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h" @@ -678,11 +678,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e01d7d493fbaceeedc4b1c6451d8240bcb9c903a" + "checksumValue": "7a943fbb8f55729a960f9b426e9ff2794453aca9" }, { "algorithm": "SHA256", - "checksumValue": "c2f0a43884771f24d7cb744b79818b160020d2739b2881b2054cfc97fb2e7b4a" + "checksumValue": "08bb43ef5626a8450f985da0af345b6658ac8bcc4585f77271decd0e41fc02e0" } ], "fileName": "Modules/_hacl/include/krml/internal/target.h" @@ -692,11 +692,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "3f66313d16891f43b21c1a736081c2c6d46bf370" + "checksumValue": "b4e6c5fc5e17864cc2bf452db2688be733d6df72" }, { "algorithm": "SHA256", - "checksumValue": "78e9bff9124968108e1699e1c6388e3d4ec9bd72dd8adff49734a69ab380ee5c" + "checksumValue": "404691cf9b2269ecc754ceca27bb8f8f029f1c8deaa4d967990dcf5ce08cd016" } ], "fileName": "Modules/_hacl/include/krml/internal/types.h" @@ -706,11 +706,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e18efc9239a5df0f222b5f7b0a65f72509d7e304" + "checksumValue": "8c3e09e00459951e060ad469c1fa31eeb9b6b9cb" }, { "algorithm": "SHA256", - "checksumValue": "47dd5a7d21b5302255f9fff28884f65d3056fc3f54471ed62ec85fa1904f8aa5" + "checksumValue": "6d1a350ec272c83761f1789611d8f60947a4d69fed3d23d8eee38709a0ad2c0a" } ], "fileName": "Modules/_hacl/include/krml/lowstar_endianness.h" @@ -720,11 +720,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "aaa656e25a92ba83655e1398a97efa6981f60fc4" + "checksumValue": "c9517c43046de129f02da8b74e454bade4872c00" }, { "algorithm": "SHA256", - "checksumValue": "a59abc6e9b3019cb18976a15e634f5146bd965fc9babf4ccbf2b531164a34f85" + "checksumValue": "96c2a973bc01b1295f7af67c7de3839facfeee36c8848d7d88c62695de98ab21" } ], "fileName": "Modules/_hacl/internal/Hacl_HMAC.h" @@ -734,11 +734,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0741cb8497309d648428be1e7b5944b1fc167187" + "checksumValue": "1c5eeb5d1866acb6bc8b4e5a01a107e3a87f5694" }, { "algorithm": "SHA256", - "checksumValue": "f9b923a566d62de047c753637143d439ca1c25221c08352ddc1738ff4a6ac721" + "checksumValue": "f262738390f56e8e9692acadecd1434c688075047d788283e1fb45d920ea6956" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2b.h" @@ -748,11 +748,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "878ae284c93824b80b1763e8b3e6be3c410777a8" + "checksumValue": "b600c1b5eafc34b29a778186737d8a51e2342d85" }, { "algorithm": "SHA256", - "checksumValue": "49df6223f6403daf503a1af1a3d2f943d30b5889fe7ed20299c3df24c1e3853d" + "checksumValue": "f22e088ad9c2eb739aefc3685ef3ab1ccaab3c5ef2e5d06cc846ad9f8e3d2669" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h" @@ -762,11 +762,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "25552d8cbf8aa345907635b38f284eec9075301e" + "checksumValue": "eb618ecc6ca2830066448f5f6d4df84a5c09f0f4" }, { "algorithm": "SHA256", - "checksumValue": "a3424cf4c5518654908086bbbf5d465715ec3b23625ef0cadc29492d1f90366c" + "checksumValue": "a4728e43deb0a9d8213b8ddcbda68a63bc73a12fb99aad54b7d28b314776a0d4" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2s.h" @@ -776,11 +776,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "54a712fc3ed5a817288351cbac5b7d9afa7e379f" + "checksumValue": "5441b6a97cc053c332b29477238d70fa011c74ac" }, { "algorithm": "SHA256", - "checksumValue": "c6abae648b8a1e9d5631c0a959620cad1f7e92ce522e07c3416199fe51debef6" + "checksumValue": "dad568d256a2ccbbbcdd419fe0543ea7137d8065a713a5f009aa52521c0f7f6a" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h" @@ -790,11 +790,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c15c5f83bbb9f62611c49f0f8f723eaab1a27488" + "checksumValue": "7081b58f28568f600d4624dd5bd6f735191e068b" }, { "algorithm": "SHA256", - "checksumValue": "95cd5d91c4a9217901d0b3395dcd8881e62e2055d723b532ec5176386a636d22" + "checksumValue": "305af1422213ed97e8e5d3d8a9dfee4f21cb2fcd2acf65ee7303ce00b2b4bd2a" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_MD5.h" @@ -804,11 +804,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7b8717e3a24e7e16a34b251d0d02da6f68439695" + "checksumValue": "b6fb6219cb40d039e789666e34b43461e3b5c82a" }, { "algorithm": "SHA256", - "checksumValue": "9473d8bc9506fe0053d7d98c225d4873011329863f1c4a8e93e43fc71bd1f314" + "checksumValue": "63c58363ff95e8146d5628dab2da25bc2fd0e8b590fd4823b512c33f843355bb" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_SHA1.h" @@ -818,11 +818,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e319c949f5a2dd765be2c8c7ff77bfe52ee6c7da" + "checksumValue": "bcc71e702df1070cb0081cb983aec7683e036719" }, { "algorithm": "SHA256", - "checksumValue": "75261448e51c3eb1ba441e973b193e23570b167f67743942ee2ee57417491c9f" + "checksumValue": "709c6272f77e2368f6cc0bf36527e3b16dd5d5f3dc26a2afdef8238200af8770" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_SHA2.h" @@ -832,11 +832,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "dbd92415c31606804102b79d5ba3d1752fe03887" + "checksumValue": "e3b5e6add5357760554b032b818992ce9bc211e0" }, { "algorithm": "SHA256", - "checksumValue": "5d74a76a0ac3659a1ae1276c3ca55521f09e83d2f0039f5c519a76f8f3c76a8e" + "checksumValue": "cf49d536a5663379fb4517018b4b3f8a232f8d4c7ddc7edfd60163d13f98a530" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_SHA3.h" @@ -846,11 +846,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "ad788265f8e1b078c4d1cb6e90b8c031590e6baf" + "checksumValue": "09a0ea364fb073f5f622a763f1351fbf4bac4627" }, { "algorithm": "SHA256", - "checksumValue": "d8354a9b75e2470085fa7e538493130e81fa23a804a6a69d34da8fdcc941c038" + "checksumValue": "916d54d7217517f0360edea920dc21585fbe6d1c2458eac86826182e12c82ec2" } ], "fileName": "Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h" @@ -860,11 +860,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "2048f3cd61dbda2df862a2982ebaf24b6815ed51" + "checksumValue": "b082645b43f8841db5e9e57c0b9a3825fa7e52f5" }, { "algorithm": "SHA256", - "checksumValue": "b0f5a79c98525b0cb1659238e095641328b7da16a94cb57a0793e635d1da3653" + "checksumValue": "059cf0d31427abf84c626797a97c140630a1a6ead578005162f46fad46663389" } ], "fileName": "Modules/_hacl/internal/Hacl_Streaming_HMAC.h" @@ -874,11 +874,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4e6b098e89fd447bd03f47b55208208456b20966" + "checksumValue": "1f85ed69e395829b3ac5af9e2d049af7708cb9cb" }, { "algorithm": "SHA256", - "checksumValue": "d54d947968ca125978d61fea844711b990f0a18ab0fbca87e41029004d9d04b6" + "checksumValue": "76997c7069a347ac78b65d26354a0324d54add4ca7a02e7be36d6d5e1c9702e0" } ], "fileName": "Modules/_hacl/internal/Hacl_Streaming_Types.h" @@ -930,11 +930,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4a0bdb9496d49bbfa3ad50bb7854d8f099e84891" + "checksumValue": "682b069d3888f3e8f8cc90f1a49bac9d1a5903d2" }, { "algorithm": "SHA256", - "checksumValue": "b1a45149239ee7af7de769a3e9339950d47c199bb9eaa10edce8a00fde603b12" + "checksumValue": "8551d2c3fde03b92c6fab5febde00347bd9184ea0085077976863c7836e9669d" } ], "fileName": "Modules/_hacl/python_hacl_namespaces.h" @@ -1730,14 +1730,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "17aa6cfc5c4c219c09287abfc10bc13f0c06f30bb654b28bfe6f567ca646eb79" + "checksumValue": "821ac9710d2c073eaf13e1b1895a9c9aa66c1157a99635c639fbff65cdbdd732" } ], - "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_6_3/expat-2.6.3.tar.gz", + "downloadLocation": "https://github.com/libexpat/libexpat/releases/download/R_2_7_3/expat-2.7.3.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.6.3:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:libexpat_project:libexpat:2.7.3:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1745,21 +1745,21 @@ "name": "expat", "originator": "Organization: Expat development team", "primaryPackagePurpose": "SOURCE", - "versionInfo": "2.6.3" + "versionInfo": "2.7.3" }, { "SPDXID": "SPDXRef-PACKAGE-hacl-star", "checksums": [ { "algorithm": "SHA256", - "checksumValue": "02dfcf0c79d488b120d7f2c2a0f9206301c7927ed5106545e0b6f2aef88da76a" + "checksumValue": "61e48893f37cb2280d106cefacf6fb5afe84edf625fec39572d0ee94e1018f26" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/7720f6d4fc0468a99d5ea6120976bcc271e42727.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/8ba599b2f6c9701b3dc961db895b0856a2210f76.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:7720f6d4fc0468a99d5ea6120976bcc271e42727:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:8ba599b2f6c9701b3dc961db895b0856a2210f76:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1767,7 +1767,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "7720f6d4fc0468a99d5ea6120976bcc271e42727" + "versionInfo": "8ba599b2f6c9701b3dc961db895b0856a2210f76" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index d3e1f0db057023..cc58ffc176af59 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -1889,6 +1889,13 @@ added = '3.5' [data.PyModuleDef_Type] added = '3.5' +[const.Py_mod_create] + added = '3.5' +[const.Py_mod_exec] + added = '3.5' +[struct.PyModuleDef_Slot] + added = '3.5' + struct_abi_kind = 'full-abi' # New slots in 3.5: # d51374ed78a3e3145911a16cdf3b9b84b3ba7d15 - Matrix multiplication (PEP 465) @@ -2293,6 +2300,10 @@ added = '3.11' [function.PyMemoryView_FromBuffer] added = '3.11' +[const.Py_bf_getbuffer] + added = '3.11' +[const.Py_bf_releasebuffer] + added = '3.11' # Constants for Py_buffer API added to this list in Python 3.11.1 (https://github.com/python/cpython/issues/98680) # (they were available with 3.11.0) @@ -2427,6 +2438,9 @@ added = '3.12' [const.Py_TPFLAGS_ITEMS_AT_END] added = '3.12' +[const.Py_mod_multiple_interpreters] + added = '3.12' + [function.PyImport_AddModuleRef] added = '3.13' [function.PyWeakref_GetRef] @@ -2505,6 +2519,8 @@ added = '3.13' [function.PyEval_GetFrameLocals] added = '3.13' +[const.Py_mod_gil] + added = '3.13' [function.Py_TYPE] added = '3.14' diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in index 2b2e8cb3e3cacd..65a1fefe72e92e 100644 --- a/Modules/Setup.bootstrap.in +++ b/Modules/Setup.bootstrap.in @@ -12,6 +12,8 @@ posix posixmodule.c _signal signalmodule.c _tracemalloc _tracemalloc.c _suggestions _suggestions.c +# needs libm and on some platforms librt +_datetime _datetimemodule.c # modules used by importlib, deepfreeze, freeze, runpy, and sysconfig _codecs _codecsmodule.c diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 3a38a60a152e8c..905ea4aa2e57a9 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -56,9 +56,6 @@ @MODULE_CMATH_TRUE@cmath cmathmodule.c @MODULE__STATISTICS_TRUE@_statistics _statisticsmodule.c -# needs libm and on some platforms librt -@MODULE__DATETIME_TRUE@_datetime _datetimemodule.c - # _decimal uses libmpdec # either static libmpdec.a from Modules/_decimal/libmpdec or libmpdec.so # with ./configure --with-system-libmpdec diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 5f9181395c4828..7ff3aeff2c7669 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -2990,16 +2990,12 @@ static PyType_Spec Task_spec = { static void TaskObj_dealloc(PyObject *self) { - _PyObject_ResurrectStart(self); - // Unregister the task here so that even if any subclass of Task - // which doesn't end up calling TaskObj_finalize not crashes. - unregister_task((TaskObj *)self); - - PyObject_CallFinalizer(self); - - if (_PyObject_ResurrectEnd(self)) { - return; + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + return; // resurrected } + // unregister the task after finalization so that + // if the task gets resurrected, it remains registered + unregister_task((TaskObj *)self); PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index ad670293ec5b6a..3ba48d5d9d3c64 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -5,6 +5,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_pyatomic_ft_wrappers.h" #include "pycore_typeobject.h" // _PyType_GetModuleState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> @@ -1532,9 +1533,7 @@ deque_dealloc(PyObject *self) Py_ssize_t i; PyObject_GC_UnTrack(deque); - if (deque->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, deque->weakreflist); if (deque->leftblock != NULL) { (void)deque_clear(self); assert(deque->leftblock != NULL); diff --git a/Modules/_csv.c b/Modules/_csv.c index e5ae853590bf2c..1f41976e95fdb1 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -237,7 +237,7 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) int value; if (!PyLong_CheckExact(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an integer", name); + "\"%s\" must be an integer, not %T", name, src); return -1; } value = PyLong_AsInt(src); @@ -255,27 +255,29 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (src == NULL) { *target = dflt; } - else { + else if (src == Py_None) { *target = NOT_SET; - if (src != Py_None) { - if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; - } - Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); - return -1; - } - *target = PyUnicode_READ_CHAR(src, 0); + } + else { + // similar to PyArg_Parse("C?") + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, not %T", + name, src); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; + } + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, " + "not a string of length %zd", + name, len); + return -1; } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } @@ -287,11 +289,12 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { + // similar to PyArg_Parse("C") if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; + "\"%s\" must be a unicode character, not %T", + name, src); + return -1; } Py_ssize_t len = PyUnicode_GetLength(src); if (len < 0) { @@ -299,8 +302,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) } if (len != 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); + "\"%s\" must be a unicode character, " + "not a string of length %zd", + name, len); return -1; } *target = PyUnicode_READ_CHAR(src, 0); @@ -314,16 +318,12 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) if (src == NULL) *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { - if (src == Py_None) - *target = NULL; - else if (!PyUnicode_Check(src)) { + if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a string", name); + "\"%s\" must be a string, not %T", name, src); return -1; } - else { - Py_XSETREF(*target, Py_NewRef(src)); - } + Py_XSETREF(*target, Py_NewRef(src)); } return 0; } @@ -533,11 +533,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) /* validate options */ if (dialect_check_quoting(self->quoting)) goto err; - if (self->delimiter == NOT_SET) { - PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); - goto err; - } if (quotechar == Py_None && quoting == NULL) self->quoting = QUOTE_NONE; if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { @@ -545,10 +540,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "quotechar must be set if quoting enabled"); goto err; } - if (self->lineterminator == NULL) { - PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); - goto err; - } if (dialect_check_char("delimiter", self->delimiter, self, true) || dialect_check_char("escapechar", self->escapechar, self, !self->skipinitialspace) || @@ -927,7 +918,7 @@ parse_reset(ReaderObj *self) } static PyObject * -Reader_iternext(PyObject *op) +Reader_iternext_lock_held(PyObject *op) { ReaderObj *self = _ReaderObj_CAST(op); @@ -994,6 +985,16 @@ Reader_iternext(PyObject *op) return fields; } +static PyObject * +Reader_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = Reader_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + static void Reader_dealloc(PyObject *op) { @@ -1312,14 +1313,8 @@ join_append_lineterminator(WriterObj *self) return 1; } -PyDoc_STRVAR(csv_writerow_doc, -"writerow(iterable)\n" -"\n" -"Construct and write a CSV record from an iterable of fields. Non-string\n" -"elements will be converted to string."); - static PyObject * -csv_writerow(PyObject *op, PyObject *seq) +csv_writerow_lock_held(PyObject *op, PyObject *seq) { WriterObj *self = _WriterObj_CAST(op); DialectObj *dialect = self->dialect; @@ -1422,11 +1417,29 @@ csv_writerow(PyObject *op, PyObject *seq) return result; } +PyDoc_STRVAR(csv_writerow_doc, +"writerow($self, row, /)\n" +"--\n\n" +"Construct and write a CSV record from an iterable of fields.\n" +"\n" +"Non-string elements will be converted to string."); + +static PyObject * +csv_writerow(PyObject *op, PyObject *seq) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = csv_writerow_lock_held(op, seq); + Py_END_CRITICAL_SECTION(); + return result; +} + PyDoc_STRVAR(csv_writerows_doc, -"writerows(iterable of iterables)\n" +"writerows($self, rows, /)\n" +"--\n\n" +"Construct and write a series of iterables to a csv file.\n" "\n" -"Construct and write a series of iterables to a csv file. Non-string\n" -"elements will be converted to string."); +"Non-string elements will be converted to string."); static PyObject * csv_writerows(PyObject *self, PyObject *seqseq) @@ -1583,13 +1596,11 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) _csv.list_dialects Return a list of all known dialect names. - - names = csv.list_dialects() [clinic start generated code]*/ static PyObject * _csv_list_dialects_impl(PyObject *module) -/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/ +/*[clinic end generated code: output=a5b92b215b006a6d input=ec58040aafd6a20a]*/ { return PyDict_Keys(get_csv_state(module)->dialects); } @@ -1626,13 +1637,11 @@ _csv.unregister_dialect name: object Delete the name/dialect mapping associated with a string name. - - csv.unregister_dialect(name) [clinic start generated code]*/ static PyObject * _csv_unregister_dialect_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/ +/*[clinic end generated code: output=0813ebca6c058df4 input=e1cf81bfe3ba0f62]*/ { _csvstate *module_state = get_csv_state(module); int rc = PyDict_Pop(module_state->dialects, name, NULL); @@ -1652,13 +1661,11 @@ _csv.get_dialect name: object Return the dialect instance associated with name. - - dialect = csv.get_dialect(name) [clinic start generated code]*/ static PyObject * _csv_get_dialect_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/ +/*[clinic end generated code: output=aa988cd573bebebb input=74865c659dcb441f]*/ { return get_dialect_from_registry(name, get_csv_state(module)); } @@ -1670,15 +1677,13 @@ _csv.field_size_limit Sets an upper limit on parsed fields. - csv.field_size_limit([limit]) - Returns old limit. If limit is not given, no new limit is set and the old limit is returned [clinic start generated code]*/ static PyObject * _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit) -/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/ +/*[clinic end generated code: output=f2799ecd908e250b input=77db7485ee3ae90a]*/ { _csvstate *module_state = get_csv_state(module); Py_ssize_t old_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); @@ -1714,14 +1719,13 @@ PyType_Spec error_spec = { PyDoc_STRVAR(csv_module_doc, "CSV parsing and writing.\n"); PyDoc_STRVAR(csv_reader_doc, -" csv_reader = reader(iterable [, dialect='excel']\n" -" [optional keyword args])\n" -" for row in csv_reader:\n" -" process(row)\n" +"reader($module, iterable, /, dialect='excel', **fmtparams)\n" +"--\n\n" +"Return a reader object that will process lines from the given iterable.\n" "\n" "The \"iterable\" argument can be any object that returns a line\n" "of input for each iteration, such as a file object or a list. The\n" -"optional \"dialect\" parameter is discussed below. The function\n" +"optional \"dialect\" argument defines a CSV dialect. The function\n" "also accepts optional keyword arguments which override settings\n" "provided by the dialect.\n" "\n" @@ -1729,22 +1733,24 @@ PyDoc_STRVAR(csv_reader_doc, "of the CSV file (which can span multiple input lines).\n"); PyDoc_STRVAR(csv_writer_doc, -" csv_writer = csv.writer(fileobj [, dialect='excel']\n" -" [optional keyword args])\n" -" for row in sequence:\n" -" csv_writer.writerow(row)\n" -"\n" -" [or]\n" +"writer($module, fileobj, /, dialect='excel', **fmtparams)\n" +"--\n\n" +"Return a writer object that will write user data on the given file object.\n" "\n" -" csv_writer = csv.writer(fileobj [, dialect='excel']\n" -" [optional keyword args])\n" -" csv_writer.writerows(rows)\n" -"\n" -"The \"fileobj\" argument can be any object that supports the file API.\n"); +"The \"fileobj\" argument can be any object that supports the file API.\n" +"The optional \"dialect\" argument defines a CSV dialect. The function\n" +"also accepts optional keyword arguments which override settings\n" +"provided by the dialect.\n"); PyDoc_STRVAR(csv_register_dialect_doc, -"Create a mapping from a string name to a dialect class.\n" -" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])"); +"register_dialect($module, name, /, dialect='excel', **fmtparams)\n" +"--\n\n" +"Create a mapping from a string name to a CVS dialect.\n" +"\n" +"The optional \"dialect\" argument specifies the base dialect instance\n" +"or the name of the registered dialect. The function also accepts\n" +"optional keyword arguments which override settings provided by the\n" +"dialect.\n"); static struct PyMethodDef csv_methods[] = { { "reader", _PyCFunction_CAST(csv_reader), diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 1bb65e0a64920d..bac119f2ae7a36 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3591,6 +3591,48 @@ generic_pycdata_new(ctypes_state *st, PyCFuncPtr_Type */ +static inline void +atomic_xsetref(PyObject **field, PyObject *value) +{ +#ifdef Py_GIL_DISABLED + PyObject *old = *field; + _Py_atomic_store_ptr(field, value); + Py_XDECREF(old); +#else + Py_XSETREF(*field, value); +#endif +} +/* + This function atomically loads the reference from *field, and + tries to get a new reference to it. If the incref fails, + it acquires critical section of obj and returns a new reference to the *field. + In the general case, this avoids contention on acquiring the critical section. +*/ +static inline PyObject * +atomic_xgetref(PyObject *obj, PyObject **field) +{ +#ifdef Py_GIL_DISABLED + PyObject *value = _Py_atomic_load_ptr(field); + if (value == NULL) { + return NULL; + } + if (_Py_TryIncrefCompare(field, value)) { + return value; + } + Py_BEGIN_CRITICAL_SECTION(obj); + value = Py_XNewRef(*field); + Py_END_CRITICAL_SECTION(); + return value; +#else + return Py_XNewRef(*field); +#endif +} + +static int +_validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags, PyObject *argtypes); + + + /*[clinic input] @critical_section @setter @@ -3607,7 +3649,7 @@ _ctypes_CFuncPtr_errcheck_set_impl(PyCFuncPtrObject *self, PyObject *value) return -1; } Py_XINCREF(value); - Py_XSETREF(self->errcheck, value); + atomic_xsetref(&self->errcheck, value); return 0; } @@ -3639,12 +3681,10 @@ static int _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) /*[clinic end generated code: output=0be0a086abbabf18 input=683c3bef4562ccc6]*/ { - PyObject *checker, *oldchecker; + PyObject *checker; if (value == NULL) { - oldchecker = self->checker; - self->checker = NULL; - Py_CLEAR(self->restype); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->restype, NULL); + atomic_xsetref(&self->checker, NULL); return 0; } ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); @@ -3660,11 +3700,9 @@ _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) if (PyObject_GetOptionalAttr(value, &_Py_ID(_check_retval_), &checker) < 0) { return -1; } - oldchecker = self->checker; - self->checker = checker; Py_INCREF(value); - Py_XSETREF(self->restype, value); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->checker, checker); + atomic_xsetref(&self->restype, value); return 0; } @@ -3706,19 +3744,25 @@ static int _ctypes_CFuncPtr_argtypes_set_impl(PyCFuncPtrObject *self, PyObject *value) /*[clinic end generated code: output=596a36e2ae89d7d1 input=c4627573e980aa8b]*/ { - PyObject *converters; - if (value == NULL || value == Py_None) { - Py_CLEAR(self->converters); - Py_CLEAR(self->argtypes); + atomic_xsetref(&self->argtypes, NULL); + atomic_xsetref(&self->converters, NULL); } else { - ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); - converters = converters_from_argtypes(st, value); + PyTypeObject *type = Py_TYPE(self); + ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); + + PyObject *converters = converters_from_argtypes(st, value); if (!converters) return -1; - Py_XSETREF(self->converters, converters); + + /* Verify paramflags again due to constraints with argtypes */ + if (!_validate_paramflags(st, type, self->paramflags, value)) { + Py_DECREF(converters); + return -1; + } + atomic_xsetref(&self->converters, converters); Py_INCREF(value); - Py_XSETREF(self->argtypes, value); + atomic_xsetref(&self->argtypes, value); } return 0; } @@ -3845,10 +3889,9 @@ _check_outarg_type(ctypes_state *st, PyObject *arg, Py_ssize_t index) /* Returns 1 on success, 0 on error */ static int -_validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) +_validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags, PyObject *argtypes) { Py_ssize_t i, len; - PyObject *argtypes; StgInfo *info; if (PyStgInfo_FromType(st, (PyObject *)type, &info) < 0) { @@ -3859,10 +3902,13 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) "abstract class"); return 0; } - argtypes = info->argtypes; + if (argtypes == NULL) { + argtypes = info->argtypes; + } - if (paramflags == NULL || info->argtypes == NULL) + if (paramflags == NULL || argtypes == NULL) { return 1; + } if (!PyTuple_Check(paramflags)) { PyErr_SetString(PyExc_TypeError, @@ -3871,7 +3917,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) } len = PyTuple_GET_SIZE(paramflags); - if (len != PyTuple_GET_SIZE(info->argtypes)) { + if (len != PyTuple_GET_SIZE(argtypes)) { PyErr_SetString(PyExc_ValueError, "paramflags must have the same length as argtypes"); return 0; @@ -3883,7 +3929,9 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { + if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || + !(name == Py_None || PyUnicode_Check(name))) + { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; @@ -3948,8 +3996,10 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) void *handle; PyObject *paramflags = NULL; - if (!PyArg_ParseTuple(args, "O|O?", &ftuple, &paramflags)) + if (!PyArg_ParseTuple(args, "O|O", &ftuple, &paramflags)) return NULL; + if (paramflags == Py_None) + paramflags = NULL; ftuple = PySequence_Tuple(ftuple); if (!ftuple) @@ -4043,7 +4093,7 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) #endif #undef USE_DLERROR ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); - if (!_validate_paramflags(st, type, paramflags)) { + if (!_validate_paramflags(st, type, paramflags, NULL)) { Py_DECREF(ftuple); return NULL; } @@ -4081,11 +4131,13 @@ PyCFuncPtr_FromVtblIndex(PyTypeObject *type, PyObject *args, PyObject *kwds) GUID *iid = NULL; Py_ssize_t iid_len = 0; - if (!PyArg_ParseTuple(args, "is|O?z#", &index, &name, &paramflags, &iid, &iid_len)) + if (!PyArg_ParseTuple(args, "is|Oz#", &index, &name, &paramflags, &iid, &iid_len)) return NULL; + if (paramflags == Py_None) + paramflags = NULL; ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); - if (!_validate_paramflags(st, type, paramflags)) { + if (!_validate_paramflags(st, type, paramflags, NULL)) { return NULL; } self = (PyCFuncPtrObject *)generic_pycdata_new(st, type, args, kwds); @@ -4514,16 +4566,11 @@ _build_result(PyObject *result, PyObject *callargs, } static PyObject * -PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) +PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) { - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); - PyObject *restype; - PyObject *converters; - PyObject *checker; - PyObject *argtypes; - PyObject *result; - PyObject *callargs; - PyObject *errcheck; + PyObject *result = NULL; + PyObject *callargs = NULL; + PyObject *ret = NULL; #ifdef MS_WIN32 IUnknown *piunk = NULL; #endif @@ -4541,13 +4588,24 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) } assert(info); /* Cannot be NULL for PyCFuncPtrObject instances */ - restype = self->restype ? self->restype : info->restype; - converters = self->converters ? self->converters : info->converters; - checker = self->checker ? self->checker : info->checker; - argtypes = self->argtypes ? self->argtypes : info->argtypes; -/* later, we probably want to have an errcheck field in stginfo */ - errcheck = self->errcheck /* ? self->errcheck : info->errcheck */; - + PyObject *restype = atomic_xgetref(op, &self->restype); + if (restype == NULL) { + restype = Py_XNewRef(info->restype); + } + PyObject *converters = atomic_xgetref(op, &self->converters); + if (converters == NULL) { + converters = Py_XNewRef(info->converters); + } + PyObject *checker = atomic_xgetref(op, &self->checker); + if (checker == NULL) { + checker = Py_XNewRef(info->checker); + } + PyObject *argtypes = atomic_xgetref(op, &self->argtypes); + if (argtypes == NULL) { + argtypes = Py_XNewRef(info->argtypes); + } + /* later, we probably want to have an errcheck field in stginfo */ + PyObject *errcheck = atomic_xgetref(op, &self->errcheck); pProc = *(void **)self->b_ptr; #ifdef MS_WIN32 @@ -4558,25 +4616,25 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (!this) { PyErr_SetString(PyExc_ValueError, "native com method call without 'this' parameter"); - return NULL; + goto finally; } if (!CDataObject_Check(st, this)) { PyErr_SetString(PyExc_TypeError, "Expected a COM this pointer as first argument"); - return NULL; + goto finally; } /* there should be more checks? No, in Python */ /* First arg is a pointer to an interface instance */ if (!this->b_ptr || *(void **)this->b_ptr == NULL) { PyErr_SetString(PyExc_ValueError, "NULL COM pointer access"); - return NULL; + goto finally; } piunk = *(IUnknown **)this->b_ptr; if (NULL == piunk->lpVtbl) { PyErr_SetString(PyExc_ValueError, "COM method call without VTable"); - return NULL; + goto finally; } pProc = ((void **)piunk->lpVtbl)[self->index - 0x1000]; } @@ -4584,8 +4642,9 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) callargs = _build_callargs(st, self, argtypes, inargs, kwds, &outmask, &inoutmask, &numretvals); - if (callargs == NULL) - return NULL; + if (callargs == NULL) { + goto finally; + } if (converters) { int required = Py_SAFE_DOWNCAST(PyTuple_GET_SIZE(converters), @@ -4604,7 +4663,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } else if (required != actual) { Py_DECREF(callargs); @@ -4613,7 +4672,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } @@ -4644,23 +4703,19 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (v == NULL || v != callargs) { Py_DECREF(result); Py_DECREF(callargs); - return v; + ret = v; + goto finally; } Py_DECREF(v); } - - return _build_result(result, callargs, - outmask, inoutmask, numretvals); -} - -static PyObject * -PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) -{ - PyObject *result; - Py_BEGIN_CRITICAL_SECTION(op); - result = PyCFuncPtr_call_lock_held(op, inargs, kwds); - Py_END_CRITICAL_SECTION(); - return result; + ret = _build_result(result, callargs, outmask, inoutmask, numretvals); +finally: + Py_XDECREF(restype); + Py_XDECREF(converters); + Py_XDECREF(checker); + Py_XDECREF(argtypes); + Py_XDECREF(errcheck); + return ret; } static int diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index d28e5708b44933..a0c9d8b70fee46 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -23,7 +23,7 @@ # define _Py_thread_local __thread #endif -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # include <complex.h> // csqrt() # undef I // for _ctypes_test_generated.c.h #endif @@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a) return sqrt(a); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) EXPORT(double complex) my_csqrt(double complex a) { return csqrt(a); @@ -989,13 +989,10 @@ EXPORT(RECT) ReturnRect(int i, RECT ar, RECT* br, POINT cp, RECT dr, { case 0: return ar; - break; case 1: return dr; - break; case 2: return gr; - break; } return ar; diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 86bcc805360a3f..662b57a5b825e2 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size) return PyFloat_FromDouble(val); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) /* We don't use _Complex types here, using arrays instead, as the C11+ standard says: "Each complex type has the same representation and alignment @@ -1599,7 +1599,7 @@ for base_code, base_c_type in [ /////////////////////////////////////////////////////////////////////////// TABLE_ENTRY_SW(d, &ffi_type_double); -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) if (Py_FFI_COMPLEX_AVAILABLE) { TABLE_ENTRY(D, &ffi_type_complex_double); TABLE_ENTRY(F, &ffi_type_complex_float); diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 2d859ed63e1758..78a33fea0192c2 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -11,7 +11,7 @@ // Do we support C99 complex types in ffi? // For Apple's libffi, this must be determined at runtime (see gh-128156). -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # if USING_APPLE_OS_LIBFFI && defined(__has_builtin) # if __has_builtin(__builtin_available) # define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *) @@ -419,7 +419,7 @@ typedef struct { visible to other threads before the `dict_final` bit is set. */ -#define STGINFO_LOCK(stginfo) Py_BEGIN_CRITICAL_SECTION_MUT(&(stginfo)->mutex) +#define STGINFO_LOCK(stginfo) Py_BEGIN_CRITICAL_SECTION_MUTEX(&(stginfo)->mutex) #define STGINFO_UNLOCK() Py_END_CRITICAL_SECTION() static inline uint8_t @@ -596,7 +596,8 @@ PyStgInfo_FromAny(ctypes_state *state, PyObject *obj, StgInfo **result) return _stginfo_from_type(state, Py_TYPE(obj), result); } -/* A variant of PyStgInfo_FromType that doesn't need the state, +/* A variant of PyStgInfo_FromType that doesn't need the state + * and doesn't modify any refcounts, * so it can be called from finalization functions when the module * state is torn down. */ @@ -604,17 +605,12 @@ static inline StgInfo * _PyStgInfo_FromType_NoState(PyObject *type) { PyTypeObject *PyCType_Type; - if (PyType_GetBaseByToken(Py_TYPE(type), &pyctype_type_spec, &PyCType_Type) < 0) { - return NULL; - } - if (PyCType_Type == NULL) { - PyErr_Format(PyExc_TypeError, "expected a ctypes type, got '%N'", type); + if (_PyType_GetBaseByToken_Borrow(Py_TYPE(type), &pyctype_type_spec, &PyCType_Type) < 0 || + PyCType_Type == NULL) { return NULL; } - StgInfo *info = PyObject_GetTypeData(type, PyCType_Type); - Py_DECREF(PyCType_Type); - return info; + return PyObject_GetTypeData(type, PyCType_Type); } // Initialize StgInfo on a newly created type diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index f208d2956e429e..ab955a0b824a2f 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -484,7 +484,7 @@ error:; /* Replace array elements at stginfo->ffi_type_pointer.elements. - Return -1 if error occured. + Return -1 if error occurred. */ int _replace_array_elements(ctypes_state *st, PyObject *layout_fields, diff --git a/Modules/_curses_panel.c b/Modules/_curses_panel.c index eecf7a1c8a1e56..6afac17c76a863 100644 --- a/Modules/_curses_panel.c +++ b/Modules/_curses_panel.c @@ -32,6 +32,8 @@ typedef struct { PyTypeObject *PyCursesPanel_Type; } _curses_panel_state; +typedef struct PyCursesPanelObject PyCursesPanelObject; + static inline _curses_panel_state * get_curses_panel_state(PyObject *module) { @@ -40,6 +42,25 @@ get_curses_panel_state(PyObject *module) return (_curses_panel_state *)state; } +static inline _curses_panel_state * +get_curses_panel_state_by_panel(PyCursesPanelObject *panel) +{ + /* + * Note: 'state' may be NULL if Py_TYPE(panel) is not a heap + * type associated with this module, but the compiler would + * have likely already complained with an "invalid pointer + * type" at compile-time. + * + * To make it more robust, all functions recovering a module's + * state from an object should expect to return NULL with an + * exception set (in contrast to functions recovering a module's + * state from a module itself). + */ + void *state = PyType_GetModuleState(Py_TYPE(panel)); + assert(state != NULL); + return (_curses_panel_state *)state; +} + static int _curses_panel_clear(PyObject *mod) { @@ -95,7 +116,7 @@ PyCursesCheckERR(_curses_panel_state *state, int code, const char *fname) /* Definition of the panel object and panel type */ -typedef struct { +typedef struct PyCursesPanelObject { PyObject_HEAD PANEL *pan; PyCursesWindowObject *wo; /* for reference counts */ @@ -262,8 +283,11 @@ static PyObject * PyCursesPanel_New(_curses_panel_state *state, PANEL *pan, PyCursesWindowObject *wo) { - PyCursesPanelObject *po = PyObject_New(PyCursesPanelObject, - state->PyCursesPanel_Type); + assert(state != NULL); + PyTypeObject *type = state->PyCursesPanel_Type; + assert(type != NULL); + assert(type->tp_alloc != NULL); + PyCursesPanelObject *po = (PyCursesPanelObject *)type->tp_alloc(type, 0); if (po == NULL) { return NULL; } @@ -278,27 +302,57 @@ PyCursesPanel_New(_curses_panel_state *state, PANEL *pan, return (PyObject *)po; } +static int +PyCursesPanel_Clear(PyObject *op) +{ + PyCursesPanelObject *self = _PyCursesPanelObject_CAST(op); + PyObject *extra = (PyObject *)panel_userptr(self->pan); + if (extra != NULL) { + Py_DECREF(extra); + if (set_panel_userptr(self->pan, NULL) == ERR) { + _curses_panel_state *state = get_curses_panel_state_by_panel(self); + PyErr_SetString(state->PyCursesError, + "set_panel_userptr() returned ERR"); + return -1; + } + } + // self->wo should not be cleared because an associated WINDOW may exist + return 0; +} + static void PyCursesPanel_Dealloc(PyObject *self) { - PyObject *tp, *obj; - PyCursesPanelObject *po = _PyCursesPanelObject_CAST(self); + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_UnTrack(self); - tp = (PyObject *) Py_TYPE(po); - obj = (PyObject *) panel_userptr(po->pan); - if (obj) { - (void)set_panel_userptr(po->pan, NULL); - Py_DECREF(obj); + PyCursesPanelObject *po = _PyCursesPanelObject_CAST(self); + if (PyCursesPanel_Clear(self) < 0) { + PyErr_FormatUnraisable("Exception ignored in PyCursesPanel_Dealloc()"); + } + if (del_panel(po->pan) == ERR && !PyErr_Occurred()) { + _curses_panel_state *state = get_curses_panel_state_by_panel(po); + PyErr_SetString(state->PyCursesError, "del_panel() returned ERR"); + PyErr_FormatUnraisable("Exception ignored in PyCursesPanel_Dealloc()"); } - (void)del_panel(po->pan); if (po->wo != NULL) { Py_DECREF(po->wo); remove_lop(po); } - PyObject_Free(po); + tp->tp_free(po); Py_DECREF(tp); } +static int +PyCursesPanel_Traverse(PyObject *op, visitproc visit, void *arg) +{ + PyCursesPanelObject *self = _PyCursesPanelObject_CAST(op); + Py_VISIT(Py_TYPE(op)); + Py_VISIT(panel_userptr(self->pan)); + Py_VISIT(self->wo); + return 0; +} + /* panel_above(NULL) returns the bottom panel in the stack. To get this behaviour we use curses.panel.bottom_panel(). */ /*[clinic input] @@ -520,7 +574,9 @@ static PyMethodDef PyCursesPanel_Methods[] = { /* -------------------------------------------------------*/ static PyType_Slot PyCursesPanel_Type_slots[] = { + {Py_tp_clear, PyCursesPanel_Clear}, {Py_tp_dealloc, PyCursesPanel_Dealloc}, + {Py_tp_traverse, PyCursesPanel_Traverse}, {Py_tp_methods, PyCursesPanel_Methods}, {0, 0}, }; @@ -528,7 +584,11 @@ static PyType_Slot PyCursesPanel_Type_slots[] = { static PyType_Spec PyCursesPanel_Type_spec = { .name = "_curses_panel.panel", .basicsize = sizeof(PyCursesPanelObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_DISALLOW_INSTANTIATION + | Py_TPFLAGS_HAVE_GC + ), .slots = PyCursesPanel_Type_slots }; diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index cd185bc2b02ea5..f4e7732b657d4e 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1139,12 +1139,12 @@ _curses.window.attron attr: long / -Add attribute attr from the "background" set. +Add attribute attr to the "background" set. [clinic start generated code]*/ static PyObject * _curses_window_attron_impl(PyCursesWindowObject *self, long attr) -/*[clinic end generated code: output=7afea43b237fa870 input=5a88fba7b1524f32]*/ +/*[clinic end generated code: output=7afea43b237fa870 input=b57f824e1bf58326]*/ { return PyCursesCheckERR_ForWin(self, wattron(self->win, (attr_t)attr), "attron"); } @@ -1312,32 +1312,27 @@ int py_mvwdelch(WINDOW *w, int y, int x) /* chgat, added by Fabian Kreutz <fabian.kreutz at gmx.net> */ #ifdef HAVE_CURSES_WCHGAT -/*[-clinic input] -_curses.window.chgat - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - - n: int = -1 - Number of characters. - - attr: long - Attributes for characters. - / - -Set the attributes of characters. +PyDoc_STRVAR(_curses_window_chgat__doc__, +"chgat([y, x,] [n=-1,] attr)\n" +"Set the attributes of characters.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Number of characters.\n" +" attr\n" +" Attributes for characters.\n" +"\n" +"Set the attributes of num characters at the current cursor position, or at\n" +"position (y, x) if supplied. If no value of num is given or num = -1, the\n" +"attribute will be set on all the characters to the end of the line. This\n" +"function does not move the cursor. The changed line will be touched using\n" +"the touchline() method so that the contents will be redisplayed by the next\n" +"window refresh."); -Set the attributes of num characters at the current cursor position, or at -position (y, x) if supplied. If no value of num is given or num = -1, the -attribute will be set on all the characters to the end of the line. This -function does not move the cursor. The changed line will be touched using -the touchline() method so that the contents will be redisplayed by the next -window refresh. -[-clinic start generated code]*/ static PyObject * PyCursesWindow_ChgAt(PyObject *op, PyObject *args) { @@ -1363,19 +1358,20 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) attr = lattr; break; case 3: - if (!PyArg_ParseTuple(args,"iil;int,int,attr", &y, &x, &lattr)) + if (!PyArg_ParseTuple(args,"iil;y,x,attr", &y, &x, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; case 4: - if (!PyArg_ParseTuple(args,"iiil;int,int,n,attr", &y, &x, &num, &lattr)) + if (!PyArg_ParseTuple(args,"iiil;y,x,n,attr", &y, &x, &num, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; default: - PyErr_SetString(PyExc_TypeError, "chgat requires 1 to 4 arguments"); + PyErr_SetString(PyExc_TypeError, + "_curses.window.chgat requires 1 to 4 arguments"); return NULL; } @@ -1533,7 +1529,7 @@ _curses_window_getbkgd_impl(PyCursesWindowObject *self) } /*[clinic input] -_curses.window.getch -> int +_curses.window.getch [ y: int @@ -1550,10 +1546,10 @@ keypad keys and so on return numbers higher than 256. In no-delay mode, -1 is returned if there is no input, else getch() waits until a key is pressed. [clinic start generated code]*/ -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x) -/*[clinic end generated code: output=980aa6af0c0ca387 input=bb24ebfb379f991f]*/ +/*[clinic end generated code: output=e1639e87d545e676 input=73f350336b1ee8c8]*/ { int rtn; @@ -1566,7 +1562,17 @@ _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, } Py_END_ALLOW_THREADS - return rtn; + if (rtn == ERR) { + // We suppress ERR returned by wgetch() in nodelay mode + // after we handled possible interruption signals. + if (PyErr_CheckSignals()) { + return NULL; + } + // ERR is an implementation detail, so to be on the safe side, + // we forcibly set the return value to -1 as documented above. + rtn = -1; + } + return PyLong_FromLong(rtn); } /*[clinic input] @@ -1678,84 +1684,102 @@ _curses_window_get_wch_impl(PyCursesWindowObject *self, int group_right_1, } #endif -/*[-clinic input] -_curses.window.getstr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 1023 - Maximal number of characters. - / +/* + * Helper function for parsing parameters from getstr() and instr(). + * This function is necessary because Argument Clinic does not know + * how to handle nested optional groups with default values inside. + * + * Return 1 on success and 0 on failure, similar to PyArg_ParseTuple(). + */ +static int +curses_clinic_parse_optional_xy_n(PyObject *args, + int *y, int *x, unsigned int *n, int *use_xy, + const char *qualname) +{ + switch (PyTuple_GET_SIZE(args)) { + case 0: { + *use_xy = 0; + return 1; + } + case 1: { + *use_xy = 0; + return PyArg_ParseTuple(args, "O&;n", + _PyLong_UnsignedInt_Converter, n); + } + case 2: { + *use_xy = 1; + return PyArg_ParseTuple(args, "ii;y,x", y, x); + } + case 3: { + *use_xy = 1; + return PyArg_ParseTuple(args, "iiO&;y,x,n", y, x, + _PyLong_UnsignedInt_Converter, n); + } + default: { + *use_xy = 0; + PyErr_Format(PyExc_TypeError, "%s requires 0 to 3 arguments", + qualname); + return 0; + } + } +} -Read a string from the user, with primitive line editing capacity. -[-clinic start generated code]*/ +PyDoc_STRVAR(_curses_window_getstr__doc__, +"getstr([[y, x,] n=2047])\n" +"Read a string from the user, with primitive line editing capacity.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters."); static PyObject * -PyCursesWindow_GetStr(PyObject *op, PyObject *args) +PyCursesWindow_getstr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - switch (PyTuple_Size(args)) { - case 0: - Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win,rtn, 1023); - Py_END_ALLOW_THREADS - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win, rtn, Py_MIN(n, 1023)); - Py_END_ALLOW_THREADS - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { + return NULL; + } + char *buf = PyBytes_AS_STRING(res); + + if (use_xy) { Py_BEGIN_ALLOW_THREADS #ifdef STRICT_SYSV_CURSES - rtn2 = wmove(self->win,y,x)==ERR ? ERR : wgetnstr(self->win, rtn, 1023); + rtn = wmove(self->win, y, x) == ERR + ? ERR + : wgetnstr(self->win, buf, n); #else - rtn2 = mvwgetnstr(self->win,y,x,rtn, 1023); + rtn = mvwgetnstr(self->win, y, x, buf, n); #endif Py_END_ALLOW_THREADS - break; - case 3: - if (!PyArg_ParseTuple(args,"iii;y,x,n", &y, &x, &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } -#ifdef STRICT_SYSV_CURSES - Py_BEGIN_ALLOW_THREADS - rtn2 = wmove(self->win,y,x)==ERR ? ERR : - wgetnstr(self->win, rtn, Py_MIN(n, 1023)); - Py_END_ALLOW_THREADS -#else + } + else { Py_BEGIN_ALLOW_THREADS - rtn2 = mvwgetnstr(self->win, y, x, rtn, Py_MIN(n, 1023)); + rtn = wgetnstr(self->win, buf, n); Py_END_ALLOW_THREADS -#endif - break; - default: - PyErr_SetString(PyExc_TypeError, "getstr requires 0 to 3 arguments"); - return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + + if (rtn == ERR) { + Py_DECREF(res); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -1880,69 +1904,57 @@ _curses_window_inch_impl(PyCursesWindowObject *self, int group_right_1, return rtn; } -/*[-clinic input] -_curses.window.instr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 1023 - Maximal number of characters. - / - -Return a string of characters, extracted from the window. +PyDoc_STRVAR(_curses_window_instr__doc__, +"instr([y, x,] n=2047)\n" +"Return a string of characters, extracted from the window.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters.\n" +"\n" +"Return a string of characters, extracted from the window starting at the\n" +"current cursor position, or at y, x if specified. Attributes are stripped\n" +"from the characters. If n is specified, instr() returns a string at most\n" +"n characters long (exclusive of the trailing NUL)."); -Return a string of characters, extracted from the window starting at the -current cursor position, or at y, x if specified. Attributes are stripped -from the characters. If n is specified, instr() returns a string at most -n characters long (exclusive of the trailing NUL). -[-clinic start generated code]*/ static PyObject * -PyCursesWindow_InStr(PyObject *op, PyObject *args) +PyCursesWindow_instr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - switch (PyTuple_Size(args)) { - case 0: - rtn2 = winnstr(self->win,rtn, 1023); - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - rtn2 = winnstr(self->win, rtn, Py_MIN(n, 1023)); - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; - rtn2 = mvwinnstr(self->win,y,x,rtn,1023); - break; - case 3: - if (!PyArg_ParseTuple(args, "iii;y,x,n", &y, &x, &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - rtn2 = mvwinnstr(self->win, y, x, rtn, Py_MIN(n,1023)); - break; - default: - PyErr_SetString(PyExc_TypeError, "instr requires 0 or 3 arguments"); + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + char *buf = PyBytes_AS_STRING(res); + + if (use_xy) { + rtn = mvwinnstr(self->win, y, x, buf, n); + } + else { + rtn = winnstr(self->win, buf, n); + } + + if (rtn == ERR) { + Py_DECREF(res); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -2653,7 +2665,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_ATTRSET_METHODDEF _CURSES_WINDOW_BKGD_METHODDEF #ifdef HAVE_CURSES_WCHGAT - {"chgat", PyCursesWindow_ChgAt, METH_VARARGS}, + { + "chgat", PyCursesWindow_ChgAt, METH_VARARGS, + _curses_window_chgat__doc__ + }, #endif _CURSES_WINDOW_BKGDSET_METHODDEF _CURSES_WINDOW_BORDER_METHODDEF @@ -2676,7 +2691,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_GET_WCH_METHODDEF {"getmaxyx", PyCursesWindow_getmaxyx, METH_NOARGS}, {"getparyx", PyCursesWindow_getparyx, METH_NOARGS}, - {"getstr", PyCursesWindow_GetStr, METH_VARARGS}, + { + "getstr", PyCursesWindow_getstr, METH_VARARGS, + _curses_window_getstr__doc__ + }, {"getyx", PyCursesWindow_getyx, METH_NOARGS}, _CURSES_WINDOW_HLINE_METHODDEF {"idcok", PyCursesWindow_idcok, METH_VARARGS}, @@ -2690,7 +2708,10 @@ static PyMethodDef PyCursesWindow_methods[] = { {"insertln", PyCursesWindow_winsertln, METH_NOARGS}, _CURSES_WINDOW_INSNSTR_METHODDEF _CURSES_WINDOW_INSSTR_METHODDEF - {"instr", PyCursesWindow_InStr, METH_VARARGS}, + { + "instr", PyCursesWindow_instr, METH_VARARGS, + _curses_window_instr__doc__ + }, _CURSES_WINDOW_IS_LINETOUCHED_METHODDEF {"is_wintouched", PyCursesWindow_is_wintouched, METH_NOARGS}, {"keypad", PyCursesWindow_keypad, METH_VARARGS}, diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 9bba0e3354b26b..cb6e3c80224846 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -14,6 +14,8 @@ #include "pycore_object.h" // _PyObject_Init() #include "pycore_time.h" // _PyTime_ObjectToTime_t() #include "pycore_unicodeobject.h" // _PyUnicode_Copy() +#include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_pyatomic_ft_wrappers.h" #include "datetime.h" @@ -124,10 +126,9 @@ get_module_state(PyObject *module) #define INTERP_KEY ((PyObject *)&_Py_ID(cached_datetime_module)) static PyObject * -get_current_module(PyInterpreterState *interp, int *p_reloading) +get_current_module(PyInterpreterState *interp) { PyObject *mod = NULL; - int reloading = 0; PyObject *dict = PyInterpreterState_GetDict(interp); if (dict == NULL) { @@ -138,7 +139,6 @@ get_current_module(PyInterpreterState *interp, int *p_reloading) goto error; } if (ref != NULL) { - reloading = 1; if (ref != Py_None) { (void)PyWeakref_GetRef(ref, &mod); if (mod == Py_None) { @@ -147,9 +147,6 @@ get_current_module(PyInterpreterState *interp, int *p_reloading) Py_DECREF(ref); } } - if (p_reloading != NULL) { - *p_reloading = reloading; - } return mod; error: @@ -163,7 +160,7 @@ static datetime_state * _get_current_state(PyObject **p_mod) { PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *mod = get_current_module(interp, NULL); + PyObject *mod = get_current_module(interp); if (mod == NULL) { assert(!PyErr_Occurred()); if (PyErr_Occurred()) { @@ -1757,6 +1754,24 @@ format_utcoffset(char *buf, size_t buflen, const char *sep, return 0; } +/* Check whether year with century should be normalized for strftime. */ +inline static int +normalize_century(void) +{ + static int cache = -1; + if (cache < 0) { + char year[5]; + struct tm date = { + .tm_year = -1801, + .tm_mon = 0, + .tm_mday = 1 + }; + cache = (strftime(year, sizeof(year), "%Y", &date) && + strcmp(year, "0099") != 0); + } + return cache; +} + static PyObject * make_somezreplacement(PyObject *object, char *sep, PyObject *tzinfoarg) { @@ -1928,10 +1943,9 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, } replacement = freplacement; } -#ifdef Py_NORMALIZE_CENTURY - else if (ch == 'Y' || ch == 'G' - || ch == 'F' || ch == 'C' - ) { + else if (normalize_century() + && (ch == 'Y' || ch == 'G' || ch == 'F' || ch == 'C')) + { /* 0-pad year with century as necessary */ PyObject *item = PySequence_GetItem(timetuple, 0); if (item == NULL) { @@ -1982,7 +1996,6 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, } continue; } -#endif else { /* percent followed by something else */ continue; @@ -2521,14 +2534,16 @@ static Py_hash_t delta_hash(PyObject *op) { PyDateTime_Delta *self = PyDelta_CAST(op); - if (self->hashcode == -1) { + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->hashcode); + if (hash == -1) { PyObject *temp = delta_getstate(self); if (temp != NULL) { - self->hashcode = PyObject_Hash(temp); + hash = PyObject_Hash(temp); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); Py_DECREF(temp); } } - return self->hashcode; + return hash; } static PyObject * @@ -3848,12 +3863,14 @@ static Py_hash_t date_hash(PyObject *op) { PyDateTime_Date *self = PyDate_CAST(op); - if (self->hashcode == -1) { - self->hashcode = generic_hash( + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->hashcode); + if (hash == -1) { + hash = generic_hash( (unsigned char *)self->data, _PyDateTime_DATE_DATASIZE); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); } - return self->hashcode; + return hash; } static PyObject * @@ -4476,7 +4493,7 @@ static PyTypeObject PyDateTime_TimeZoneType = { timezone_methods, /* tp_methods */ 0, /* tp_members */ 0, /* tp_getset */ - 0, /* tp_base; filled in PyInit__datetime */ + &PyDateTime_TZInfoType, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ @@ -4932,7 +4949,8 @@ static Py_hash_t time_hash(PyObject *op) { PyDateTime_Time *self = PyTime_CAST(op); - if (self->hashcode == -1) { + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->hashcode); + if (hash == -1) { PyObject *offset, *self0; if (TIME_GET_FOLD(self)) { self0 = new_time_ex2(TIME_GET_HOUR(self), @@ -4954,10 +4972,11 @@ time_hash(PyObject *op) return -1; /* Reduce this to a hash of another object. */ - if (offset == Py_None) - self->hashcode = generic_hash( + if (offset == Py_None) { + hash = generic_hash( (unsigned char *)self->data, _PyDateTime_TIME_DATASIZE); - else { + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); + } else { PyObject *temp1, *temp2; int seconds, microseconds; assert(HASTZINFO(self)); @@ -4976,12 +4995,13 @@ time_hash(PyObject *op) Py_DECREF(offset); return -1; } - self->hashcode = PyObject_Hash(temp2); + hash = PyObject_Hash(temp2); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); Py_DECREF(temp2); } Py_DECREF(offset); } - return self->hashcode; + return hash; } /*[clinic input] @@ -6439,7 +6459,8 @@ static Py_hash_t datetime_hash(PyObject *op) { PyDateTime_DateTime *self = PyDateTime_CAST(op); - if (self->hashcode == -1) { + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(self->hashcode); + if (hash == -1) { PyObject *offset, *self0; if (DATE_GET_FOLD(self)) { self0 = new_datetime_ex2(GET_YEAR(self), @@ -6464,10 +6485,11 @@ datetime_hash(PyObject *op) return -1; /* Reduce this to a hash of another object. */ - if (offset == Py_None) - self->hashcode = generic_hash( + if (offset == Py_None) { + hash = generic_hash( (unsigned char *)self->data, _PyDateTime_DATETIME_DATASIZE); - else { + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); + } else { PyObject *temp1, *temp2; int days, seconds; @@ -6491,12 +6513,13 @@ datetime_hash(PyObject *op) Py_DECREF(offset); return -1; } - self->hashcode = PyObject_Hash(temp2); + hash = PyObject_Hash(temp2); + FT_ATOMIC_STORE_SSIZE_RELAXED(self->hashcode, hash); Py_DECREF(temp2); } Py_DECREF(offset); } - return self->hashcode; + return hash; } /*[clinic input] @@ -7131,8 +7154,7 @@ static PyTypeObject PyDateTime_DateTimeType = { datetime_methods, /* tp_methods */ 0, /* tp_members */ datetime_getset, /* tp_getset */ - 0, /* tp_base; filled in - PyInit__datetime */ + &PyDateTime_DateType, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ @@ -7313,29 +7335,82 @@ clear_state(datetime_state *st) } -static int -init_static_types(PyInterpreterState *interp, int reloading) +PyStatus +_PyDateTime_InitTypes(PyInterpreterState *interp) { - if (reloading) { - return 0; - } - - // `&...` is not a constant expression according to a strict reading - // of C standards. Fill tp_base at run-time rather than statically. - // See https://bugs.python.org/issue40777 - PyDateTime_TimeZoneType.tp_base = &PyDateTime_TZInfoType; - PyDateTime_DateTimeType.tp_base = &PyDateTime_DateType; - /* Bases classes must be initialized before subclasses, * so capi_types must have the types in the appropriate order. */ for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { PyTypeObject *type = capi_types[i]; if (_PyStaticType_InitForExtension(interp, type) < 0) { - return -1; + return _PyStatus_ERR("could not initialize static types"); } } - return 0; +#define DATETIME_ADD_MACRO(dict, c, value_expr) \ + do { \ + assert(!PyErr_Occurred()); \ + PyObject *value = (value_expr); \ + if (value == NULL) { \ + goto error; \ + } \ + if (PyDict_SetItemString(dict, c, value) < 0) { \ + Py_DECREF(value); \ + goto error; \ + } \ + Py_DECREF(value); \ + } while(0) + + /* timedelta values */ + PyObject *d = _PyType_GetDict(&PyDateTime_DeltaType); + DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); + DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); + DATETIME_ADD_MACRO(d, "max", + new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); + + /* date values */ + d = _PyType_GetDict(&PyDateTime_DateType); + DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); + DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); + DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); + + /* time values */ + d = _PyType_GetDict(&PyDateTime_TimeType); + DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); + DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); + DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); + + /* datetime values */ + d = _PyType_GetDict(&PyDateTime_DateTimeType); + DATETIME_ADD_MACRO(d, "min", + new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); + DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, + 999999, Py_None, 0)); + DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); + + /* timezone values */ + d = _PyType_GetDict(&PyDateTime_TimeZoneType); + if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { + goto error; + } + + /* bpo-37642: These attributes are rounded to the nearest minute for backwards + * compatibility, even though the constructor will accept a wider range of + * values. This may change in the future.*/ + + /* -23:59 */ + DATETIME_ADD_MACRO(d, "min", create_timezone_from_delta(-1, 60, 0, 1)); + + /* +23:59 */ + DATETIME_ADD_MACRO( + d, "max", create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0)); + +#undef DATETIME_ADD_MACRO + + return _PyStatus_OK(); + +error: + return _PyStatus_NO_MEMORY(); } @@ -7353,20 +7428,15 @@ _datetime_exec(PyObject *module) { int rc = -1; datetime_state *st = get_module_state(module); - int reloading = 0; PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *old_module = get_current_module(interp, &reloading); + PyObject *old_module = get_current_module(interp); if (PyErr_Occurred()) { assert(old_module == NULL); goto error; } /* We actually set the "current" module right before a successful return. */ - if (init_static_types(interp, reloading) < 0) { - goto error; - } - for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { PyTypeObject *type = capi_types[i]; const char *name = _PyType_Name(type); @@ -7380,68 +7450,6 @@ _datetime_exec(PyObject *module) goto error; } -#define DATETIME_ADD_MACRO(dict, c, value_expr) \ - do { \ - assert(!PyErr_Occurred()); \ - PyObject *value = (value_expr); \ - if (value == NULL) { \ - goto error; \ - } \ - if (PyDict_SetItemString(dict, c, value) < 0) { \ - Py_DECREF(value); \ - goto error; \ - } \ - Py_DECREF(value); \ - } while(0) - - if (!reloading) { - /* timedelta values */ - PyObject *d = _PyType_GetDict(&PyDateTime_DeltaType); - DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); - DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); - DATETIME_ADD_MACRO(d, "max", - new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); - - /* date values */ - d = _PyType_GetDict(&PyDateTime_DateType); - DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); - DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); - DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); - - /* time values */ - d = _PyType_GetDict(&PyDateTime_TimeType); - DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); - DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); - DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); - - /* datetime values */ - d = _PyType_GetDict(&PyDateTime_DateTimeType); - DATETIME_ADD_MACRO(d, "min", - new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); - DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, - 999999, Py_None, 0)); - DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); - - /* timezone values */ - d = _PyType_GetDict(&PyDateTime_TimeZoneType); - if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { - goto error; - } - - /* bpo-37642: These attributes are rounded to the nearest minute for backwards - * compatibility, even though the constructor will accept a wider range of - * values. This may change in the future.*/ - - /* -23:59 */ - DATETIME_ADD_MACRO(d, "min", create_timezone_from_delta(-1, 60, 0, 1)); - - /* +23:59 */ - DATETIME_ADD_MACRO( - d, "max", create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0)); - } - -#undef DATETIME_ADD_MACRO - /* Add module level attributes */ if (PyModule_AddIntMacro(module, MINYEAR) < 0) { goto error; diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index cc65cbd98d71dc..c393cde21f5491 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -69,6 +69,7 @@ typedef struct { #include "clinic/_dbmmodule.c.h" #define check_dbmobject_open(v, err) \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED((v)) \ if ((v)->di_dbm == NULL) { \ PyErr_SetString(err, "DBM object has already been closed"); \ return NULL; \ @@ -116,7 +117,7 @@ dbm_dealloc(PyObject *self) } static Py_ssize_t -dbm_length(PyObject *self) +dbm_length_lock_held(PyObject *self) { dbmobject *dp = dbmobject_CAST(self); _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -138,8 +139,18 @@ dbm_length(PyObject *self) return dp->di_size; } +static Py_ssize_t +dbm_length(PyObject *self) +{ + Py_ssize_t result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_length_lock_held(self); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -dbm_bool(PyObject *self) +dbm_bool_lock_held(PyObject *self) { dbmobject *dp = dbmobject_CAST(self); _dbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -170,8 +181,18 @@ dbm_bool(PyObject *self) return 1; } +static int +dbm_bool(PyObject *self) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_bool_lock_held(self); + Py_END_CRITICAL_SECTION(); + return result; +} + static PyObject * -dbm_subscript(PyObject *self, PyObject *key) +dbm_subscript_lock_held(PyObject *self, PyObject *key) { datum drec, krec; Py_ssize_t tmp_size; @@ -197,8 +218,18 @@ dbm_subscript(PyObject *self, PyObject *key) return PyBytes_FromStringAndSize(drec.dptr, drec.dsize); } +static PyObject * +dbm_subscript(PyObject *self, PyObject *key) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_subscript_lock_held(self, key); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) +dbm_ass_sub_lock_held(PyObject *self, PyObject *v, PyObject *w) { datum krec, drec; Py_ssize_t tmp_size; @@ -252,7 +283,18 @@ dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) return 0; } +static int +dbm_ass_sub(PyObject *self, PyObject *v, PyObject *w) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_ass_sub_lock_held(self, v, w); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _dbm.dbm.close Close the database. @@ -260,7 +302,7 @@ Close the database. static PyObject * _dbm_dbm_close_impl(dbmobject *self) -/*[clinic end generated code: output=c8dc5b6709600b86 input=046db72377d51be8]*/ +/*[clinic end generated code: output=c8dc5b6709600b86 input=4a94f79facbc28ca]*/ { if (self->di_dbm) { dbm_close(self->di_dbm); @@ -270,6 +312,7 @@ _dbm_dbm_close_impl(dbmobject *self) } /*[clinic input] +@critical_section _dbm.dbm.keys cls: defining_class @@ -279,7 +322,7 @@ Return a list of all keys in the database. static PyObject * _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=f2a593b3038e5996 input=d3706a28fc051097]*/ +/*[clinic end generated code: output=f2a593b3038e5996 input=6ddefeadf2a80156]*/ { PyObject *v, *item; datum key; @@ -310,7 +353,7 @@ _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls) } static int -dbm_contains(PyObject *self, PyObject *arg) +dbm_contains_lock_held(PyObject *self, PyObject *arg) { dbmobject *dp = dbmobject_CAST(self); datum key, val; @@ -343,7 +386,18 @@ dbm_contains(PyObject *self, PyObject *arg) return val.dptr != NULL; } +static int +dbm_contains(PyObject *self, PyObject *arg) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = dbm_contains_lock_held(self, arg); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _dbm.dbm.get cls: defining_class key: str(accept={str, robuffer}, zeroes=True) @@ -356,7 +410,7 @@ Return the value for key if present, otherwise default. static PyObject * _dbm_dbm_get_impl(dbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length, PyObject *default_value) -/*[clinic end generated code: output=b4e55f8b6d482bc4 input=66b993b8349fa8c1]*/ +/*[clinic end generated code: output=b4e55f8b6d482bc4 input=1d88a22bb5e55202]*/ { datum dbm_key, val; _dbm_state *state = PyType_GetModuleState(cls); @@ -373,6 +427,7 @@ _dbm_dbm_get_impl(dbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _dbm.dbm.setdefault cls: defining_class key: str(accept={str, robuffer}, zeroes=True) @@ -387,7 +442,7 @@ If key is not in the database, it is inserted with default as the value. static PyObject * _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length, PyObject *default_value) -/*[clinic end generated code: output=9c2f6ea6d0fb576c input=126a3ff15c5f8232]*/ +/*[clinic end generated code: output=9c2f6ea6d0fb576c input=c01510ef7571e13b]*/ { datum dbm_key, val; Py_ssize_t tmp_size; @@ -427,6 +482,7 @@ _dbm_dbm_setdefault_impl(dbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _dbm.dbm.clear cls: defining_class / @@ -436,7 +492,7 @@ Remove all items from the database. static PyObject * _dbm_dbm_clear_impl(dbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=8d126b9e1d01a434 input=43aa6ca1acb7f5f5]*/ +/*[clinic end generated code: output=8d126b9e1d01a434 input=a1aa5d99adfb9656]*/ { _dbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -467,8 +523,12 @@ dbm__enter__(PyObject *self, PyObject *Py_UNUSED(dummy)) static PyObject * dbm__exit__(PyObject *self, PyObject *Py_UNUSED(args)) { + PyObject *result; dbmobject *dp = dbmobject_CAST(self); - return _dbm_dbm_close_impl(dp); + Py_BEGIN_CRITICAL_SECTION(self); + result = _dbm_dbm_close_impl(dp); + Py_END_CRITICAL_SECTION(); + return result; } static PyMethodDef dbm_methods[] = { diff --git a/Modules/_decimal/docstrings.h b/Modules/_decimal/docstrings.h index 77017a92252cb8..9c2da5e35f919a 100644 --- a/Modules/_decimal/docstrings.h +++ b/Modules/_decimal/docstrings.h @@ -292,22 +292,26 @@ an infinity then Decimal('Infinity') is returned.\n\ PyDoc_STRVAR(doc_logical_and, "logical_and($self, /, other, context=None)\n--\n\n\ -Return the digit-wise 'and' of the two (logical) operands.\n\ +Applies an 'and' operation between self and other's digits.\n\n\ +Both self and other must be logical numbers.\n\ \n"); PyDoc_STRVAR(doc_logical_invert, "logical_invert($self, /, context=None)\n--\n\n\ -Return the digit-wise inversion of the (logical) operand.\n\ +Invert all its digits.\n\n\ +The self must be logical number.\n\ \n"); PyDoc_STRVAR(doc_logical_or, "logical_or($self, /, other, context=None)\n--\n\n\ -Return the digit-wise 'or' of the two (logical) operands.\n\ +Applies an 'or' operation between self and other's digits.\n\n\ +Both self and other must be logical numbers. \n\ \n"); PyDoc_STRVAR(doc_logical_xor, "logical_xor($self, /, other, context=None)\n--\n\n\ -Return the digit-wise 'exclusive or' of the two (logical) operands.\n\ +Applies an 'xor' operation between self and other's digits.\n\n\ +Both self and other must be logical numbers.\n\ \n"); PyDoc_STRVAR(doc_max, @@ -712,22 +716,90 @@ Return the exponent of the magnitude of the operand's MSD.\n\ PyDoc_STRVAR(doc_ctx_logical_and, "logical_and($self, x, y, /)\n--\n\n\ -Digit-wise and of x and y.\n\ +Applies the logical operation 'and' between each operand's digits.\n\n\ +The operands must be both logical numbers.\n\n\ + >>> ExtendedContext.logical_and(Decimal('0'), Decimal('0'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_and(Decimal('0'), Decimal('1'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_and(Decimal('1'), Decimal('0'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_and(Decimal('1'), Decimal('1'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_and(Decimal('1100'), Decimal('1010'))\n\ + Decimal('1000')\n\ + >>> ExtendedContext.logical_and(Decimal('1111'), Decimal('10'))\n\ + Decimal('10')\n\ + >>> ExtendedContext.logical_and(110, 1101)\n\ + Decimal('100')\n\ + >>> ExtendedContext.logical_and(Decimal(110), 1101)\n\ + Decimal('100')\n\ + >>> ExtendedContext.logical_and(110, Decimal(1101))\n\ + Decimal('100')\n\ \n"); PyDoc_STRVAR(doc_ctx_logical_invert, "logical_invert($self, x, /)\n--\n\n\ -Invert all digits of x.\n\ +Invert all the digits in the operand.\n\n\ +The operand must be a logical number.\n\n\ + >>> ExtendedContext.logical_invert(Decimal('0'))\n\ + Decimal('111111111')\n\ + >>> ExtendedContext.logical_invert(Decimal('1'))\n\ + Decimal('111111110')\n\ + >>> ExtendedContext.logical_invert(Decimal('111111111'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_invert(Decimal('101010101'))\n\ + Decimal('10101010')\n\ + >>> ExtendedContext.logical_invert(1101)\n\ + Decimal('111110010')\n\ \n"); PyDoc_STRVAR(doc_ctx_logical_or, "logical_or($self, x, y, /)\n--\n\n\ -Digit-wise or of x and y.\n\ +Applies the logical operation 'or' between each operand's digits.\n\n\ +The operands must be both logical numbers.\n\n\ + >>> ExtendedContext.logical_or(Decimal('0'), Decimal('0'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_or(Decimal('0'), Decimal('1'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_or(Decimal('1'), Decimal('0'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_or(Decimal('1'), Decimal('1'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_or(Decimal('1100'), Decimal('1010'))\n\ + Decimal('1110')\n\ + >>> ExtendedContext.logical_or(Decimal('1110'), Decimal('10'))\n\ + Decimal('1110')\n\ + >>> ExtendedContext.logical_or(110, 1101)\n\ + Decimal('1111')\n\ + >>> ExtendedContext.logical_or(Decimal(110), 1101)\n\ + Decimal('1111')\n\ + >>> ExtendedContext.logical_or(110, Decimal(1101))\n\ + Decimal('1111')\n\ \n"); PyDoc_STRVAR(doc_ctx_logical_xor, "logical_xor($self, x, y, /)\n--\n\n\ -Digit-wise xor of x and y.\n\ +Applies the logical operation 'xor' between each operand's digits.\n\n\ +The operands must be both logical numbers.\n\n\ + >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('0'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_xor(Decimal('0'), Decimal('1'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('0'))\n\ + Decimal('1')\n\ + >>> ExtendedContext.logical_xor(Decimal('1'), Decimal('1'))\n\ + Decimal('0')\n\ + >>> ExtendedContext.logical_xor(Decimal('1100'), Decimal('1010'))\n\ + Decimal('110')\n\ + >>> ExtendedContext.logical_xor(Decimal('1111'), Decimal('10'))\n\ + Decimal('1101')\n\ + >>> ExtendedContext.logical_xor(110, 1101)\n\ + Decimal('1011')\n\ + >>> ExtendedContext.logical_xor(Decimal(110), 1101)\n\ + Decimal('1011')\n\ + >>> ExtendedContext.logical_xor(110, Decimal(1101))\n\ + Decimal('1011')\n\ \n"); PyDoc_STRVAR(doc_ctx_max, diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 8c3efa36353e24..3173b52afb31b6 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -17,6 +17,7 @@ #include "Python.h" #include "pycore_pyhash.h" // _Py_HashSecret +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> // offsetof() #include "expat.h" @@ -690,8 +691,7 @@ element_dealloc(PyObject *op) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); /* element_gc_clear clears all references and deallocates extra */ @@ -811,6 +811,8 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); + // The deepcopy() helper takes care of incrementing the refcount + // of the object to copy so to avoid use-after-frees. tag = deepcopy(st, self->tag, memo); if (!tag) return NULL; @@ -845,11 +847,13 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra || !element->extra->length); if (self->extra) { - if (element_resize(element, self->extra->length) < 0) + Py_ssize_t expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + assert(!element->extra->length); goto error; + } - // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' - for (i = 0; i < self->extra->length; i++) { + for (i = 0; self->extra && i < self->extra->length; i++) { PyObject* child = deepcopy(st, self->extra->children[i], memo); if (!child || !Element_Check(st, child)) { if (child) { @@ -859,11 +863,24 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) element->extra->length = i; goto error; } + if (self->extra && expected_count != self->extra->length) { + // 'self->extra' got mutated and 'element' may not have + // sufficient space to hold the next iteration's item. + expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + Py_DECREF(child); + element->extra->length = i; + goto error; + } + } element->extra->children[i] = child; } assert(!element->extra->length); - element->extra->length = self->extra->length; + // The original 'self->extra' may be gone at this point if deepcopy() + // has side-effects. However, 'i' is the number of copied items that + // we were able to successfully copy. + element->extra->length = i; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -895,7 +912,7 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) return Py_NewRef(object); } - if (Py_REFCNT(object) == 1) { + if (_PyObject_IsUniquelyReferenced(object)) { if (PyDict_CheckExact(object)) { PyObject *key, *value; Py_ssize_t pos = 0; @@ -906,13 +923,20 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) break; } } - if (simple) + if (simple) { return PyDict_Copy(object); + } /* Fall through to general case */ } else if (Element_CheckExact(st, object)) { - return _elementtree_Element___deepcopy___impl( + // The __deepcopy__() call may call arbitrary code even if the + // object to copy is a built-in XML element (one of its children + // any of its parents in its own __deepcopy__() implementation). + Py_INCREF(object); + PyObject *res = _elementtree_Element___deepcopy___impl( (ElementObject *)object, memo); + Py_DECREF(object); + return res; } } @@ -923,8 +947,11 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) return NULL; } + Py_INCREF(object); PyObject *args[2] = {object, memo}; - return PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + PyObject *res = PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + Py_DECREF(object); + return res; } @@ -2767,8 +2794,9 @@ treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) self->data = Py_NewRef(data); } else { /* more than one item; use a list to collect items */ - if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && - PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { + if (PyBytes_CheckExact(self->data) + && _PyObject_IsUniquelyReferenced(self->data) + && PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { /* XXX this code path unused in Python 3? */ /* expat often generates single character data sections; handle the most common case by resizing the existing string... */ @@ -4187,8 +4215,8 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, (XML_ProcessingInstructionHandler) expat_pi_handler ); } else { - Py_DECREF(events_seq); PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name); + Py_DECREF(events_seq); return NULL; } } diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index e6c454faf4b16f..3420e7d8c37b74 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -7,6 +7,7 @@ #include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/_functoolsmodule.c.h" @@ -196,6 +197,19 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) return NULL; } + /* keyword Placeholder prohibition */ + if (kw != NULL) { + PyObject *key, *val; + Py_ssize_t pos = 0; + while (PyDict_Next(kw, &pos, &key, &val)) { + if (val == phold) { + PyErr_SetString(PyExc_TypeError, + "Placeholder cannot be passed as a keyword argument"); + return NULL; + } + } + } + /* check wrapped function / object */ pto_args = pto_kw = NULL; int res = PyObject_TypeCheck(func, state->partial_type); @@ -284,7 +298,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) if (kw == NULL) { pto->kw = PyDict_New(); } - else if (Py_REFCNT(kw) == 1) { + else if (_PyObject_IsUniquelyReferenced(kw)) { pto->kw = Py_NewRef(kw); } else { @@ -338,9 +352,7 @@ partial_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - if (partialobject_CAST(self)->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, partialobject_CAST(self)->weakreflist); (void)partial_clear(self); tp->tp_free(self); Py_DECREF(tp); @@ -687,7 +699,8 @@ partial_setstate(PyObject *self, PyObject *state) if (!PyArg_ParseTuple(state, "OOOO", &fn, &fnargs, &kw, &dict) || !PyCallable_Check(fn) || !PyTuple_Check(fnargs) || - (kw != Py_None && !PyDict_Check(kw))) + (kw != Py_None && !PyDict_Check(kw)) || + (dict != Py_None && !PyDict_Check(dict))) { PyErr_SetString(PyExc_TypeError, "invalid partial state"); return NULL; @@ -983,7 +996,7 @@ _functools_reduce_impl(PyObject *module, PyObject *func, PyObject *seq, for (;;) { PyObject *op2; - if (Py_REFCNT(args) > 1) { + if (!_PyObject_IsUniquelyReferenced(args)) { Py_DECREF(args); if ((args = PyTuple_New(2)) == NULL) goto Fail; @@ -1370,8 +1383,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, this same key, then this setitem call will update the cache dict with this new link, leaving the old link as an orphan (i.e. not having a cache dict entry that refers to it). */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { Py_DECREF(link); return NULL; } @@ -1440,8 +1453,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, for successful insertion in the cache dict before adding the link to the linked list. Otherwise, the potentially reentrant __eq__ call could cause the then orphan link to be visited. */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { /* Somehow the cache dict update failed. We no longer can restore the old link. Let the error propagate upward and leave the cache short one link. */ @@ -1608,9 +1621,7 @@ lru_cache_dealloc(PyObject *op) PyTypeObject *tp = Py_TYPE(obj); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(obj); - if (obj->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, obj->weakreflist); (void)lru_cache_tp_clear(op); tp->tp_free(obj); @@ -1676,7 +1687,13 @@ _functools__lru_cache_wrapper_cache_clear_impl(PyObject *self) lru_list_elem *list = lru_cache_unlink_list(_self); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->hits, 0); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->misses, 0); - PyDict_Clear(_self->cache); + if (_self->wrapper == bounded_lru_cache_wrapper) { + /* The critical section on the lru cache itself protects the dictionary + for bounded_lru_cache instances. */ + _PyDict_Clear_LockHeld(_self->cache); + } else { + PyDict_Clear(_self->cache); + } lru_cache_clear_list(list); Py_RETURN_NONE; } diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index ab2ebdba9249bf..d9abab19719789 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -81,6 +81,7 @@ typedef struct { #include "clinic/_gdbmmodule.c.h" #define check_gdbmobject_open(v, err) \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED((v)) \ if ((v)->di_dbm == NULL) { \ PyErr_SetString(err, "GDBM object has already been closed"); \ return NULL; \ @@ -142,7 +143,7 @@ gdbm_dealloc(PyObject *op) } static Py_ssize_t -gdbm_length(PyObject *op) +gdbm_length_lock_held(PyObject *op) { gdbmobject *dp = _gdbmobject_CAST(op); _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -188,8 +189,18 @@ gdbm_length(PyObject *op) return dp->di_size; } +static Py_ssize_t +gdbm_length(PyObject *op) +{ + Py_ssize_t result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_length_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -gdbm_bool(PyObject *op) +gdbm_bool_lock_held(PyObject *op) { gdbmobject *dp = _gdbmobject_CAST(op); _gdbm_state *state = PyType_GetModuleState(Py_TYPE(dp)); @@ -218,6 +229,16 @@ gdbm_bool(PyObject *op) return 1; } +static int +gdbm_bool(PyObject *op) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_bool_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + // Wrapper function for PyArg_Parse(o, "s#", &d.dptr, &d.size). // This function is needed to support PY_SSIZE_T_CLEAN. // Return 1 on success, same to PyArg_Parse(). @@ -240,7 +261,7 @@ parse_datum(PyObject *o, datum *d, const char *failmsg) } static PyObject * -gdbm_subscript(PyObject *op, PyObject *key) +gdbm_subscript_lock_held(PyObject *op, PyObject *key) { PyObject *v; datum drec, krec; @@ -265,6 +286,16 @@ gdbm_subscript(PyObject *op, PyObject *key) return v; } +static PyObject * +gdbm_subscript(PyObject *op, PyObject *key) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_subscript_lock_held(op, key); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] _gdbm.gdbm.get @@ -290,7 +321,7 @@ _gdbm_gdbm_get_impl(gdbmobject *self, PyObject *key, PyObject *default_value) } static int -gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) +gdbm_ass_sub_lock_held(PyObject *op, PyObject *v, PyObject *w) { datum krec, drec; const char *failmsg = "gdbm mappings have bytes or string indices only"; @@ -335,7 +366,18 @@ gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) return 0; } +static int +gdbm_ass_sub(PyObject *op, PyObject *v, PyObject *w) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = gdbm_ass_sub_lock_held(op, v, w); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _gdbm.gdbm.setdefault key: object @@ -348,7 +390,7 @@ Get value for key, or set it to default and return default if not present. static PyObject * _gdbm_gdbm_setdefault_impl(gdbmobject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=f3246e880509f142 input=0db46b69e9680171]*/ +/*[clinic end generated code: output=f3246e880509f142 input=854374cd81ab51b6]*/ { PyObject *res; @@ -363,6 +405,7 @@ _gdbm_gdbm_setdefault_impl(gdbmobject *self, PyObject *key, } /*[clinic input] +@critical_section _gdbm.gdbm.close Close the database. @@ -370,7 +413,7 @@ Close the database. static PyObject * _gdbm_gdbm_close_impl(gdbmobject *self) -/*[clinic end generated code: output=f5abb4d6bb9e52d5 input=0a203447379b45fd]*/ +/*[clinic end generated code: output=f5abb4d6bb9e52d5 input=56b604f4e77f533d]*/ { if (self->di_dbm) { gdbm_close(self->di_dbm); @@ -381,6 +424,7 @@ _gdbm_gdbm_close_impl(gdbmobject *self) /* XXX Should return a set or a set view */ /*[clinic input] +@critical_section _gdbm.gdbm.keys cls: defining_class @@ -390,7 +434,7 @@ Get a list of all keys in the database. static PyObject * _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=c24b824e81404755 input=1428b7c79703d7d5]*/ +/*[clinic end generated code: output=c24b824e81404755 input=785988b1ea8f77e0]*/ { PyObject *v, *item; datum key, nextkey; @@ -432,7 +476,7 @@ _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls) } static int -gdbm_contains(PyObject *self, PyObject *arg) +gdbm_contains_lock_held(PyObject *self, PyObject *arg) { gdbmobject *dp = (gdbmobject *)self; datum key; @@ -463,7 +507,18 @@ gdbm_contains(PyObject *self, PyObject *arg) return gdbm_exists(dp->di_dbm, key); } +static int +gdbm_contains(PyObject *self, PyObject *arg) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(self); + result = gdbm_contains_lock_held(self, arg); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] +@critical_section _gdbm.gdbm.firstkey cls: defining_class @@ -477,7 +532,7 @@ hash values, and won't be sorted by the key values. static PyObject * _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=139275e9c8b60827 input=ed8782a029a5d299]*/ +/*[clinic end generated code: output=139275e9c8b60827 input=aad5a7c886c542f5]*/ { PyObject *v; datum key; @@ -497,6 +552,7 @@ _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.nextkey cls: defining_class @@ -517,7 +573,7 @@ to create a list in memory that contains them all: static PyObject * _gdbm_gdbm_nextkey_impl(gdbmobject *self, PyTypeObject *cls, const char *key, Py_ssize_t key_length) -/*[clinic end generated code: output=c81a69300ef41766 input=365e297bc0b3db48]*/ +/*[clinic end generated code: output=c81a69300ef41766 input=181f1130d5bfeb1e]*/ { PyObject *v; datum dbm_key, nextkey; @@ -539,6 +595,7 @@ _gdbm_gdbm_nextkey_impl(gdbmobject *self, PyTypeObject *cls, const char *key, } /*[clinic input] +@critical_section _gdbm.gdbm.reorganize cls: defining_class @@ -554,7 +611,7 @@ kept and reused as new (key,value) pairs are added. static PyObject * _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=d77c69e8e3dd644a input=e1359faeef844e46]*/ +/*[clinic end generated code: output=d77c69e8e3dd644a input=3e3ca0d2ea787861]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -573,6 +630,7 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.sync cls: defining_class @@ -585,7 +643,7 @@ any unwritten data to be written to the disk. static PyObject * _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=bb680a2035c3f592 input=3d749235f79b6f2a]*/ +/*[clinic end generated code: output=bb680a2035c3f592 input=6054385b071d238a]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -595,6 +653,7 @@ _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _gdbm.gdbm.clear cls: defining_class / @@ -604,7 +663,7 @@ Remove all items from the database. static PyObject * _gdbm_gdbm_clear_impl(gdbmobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=673577c573318661 input=34136d52fcdd4210]*/ +/*[clinic end generated code: output=673577c573318661 input=b17467adfe62f23d]*/ { _gdbm_state *state = PyType_GetModuleState(cls); assert(state != NULL); @@ -619,8 +678,10 @@ _gdbm_gdbm_clear_impl(gdbmobject *self, PyTypeObject *cls) } if (gdbm_delete(self->di_dbm, key) < 0) { PyErr_SetString(state->gdbm_error, "cannot delete item from database"); + free(key.dptr); return NULL; } + free(key.dptr); } Py_RETURN_NONE; } @@ -634,7 +695,11 @@ gdbm__enter__(PyObject *self, PyObject *args) static PyObject * gdbm__exit__(PyObject *self, PyObject *args) { - return _gdbm_gdbm_close_impl((gdbmobject *)self); + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(self); + result = _gdbm_gdbm_close_impl((gdbmobject *)self); + Py_END_CRITICAL_SECTION(); + return result; } static PyMethodDef gdbm_methods[] = { diff --git a/Modules/_hacl/Hacl_HMAC.h b/Modules/_hacl/Hacl_HMAC.h index 10ff15183f2834..7dca53c9254546 100644 --- a/Modules/_hacl/Hacl_HMAC.h +++ b/Modules/_hacl/Hacl_HMAC.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_HMAC_H -#define __Hacl_HMAC_H +#ifndef Hacl_HMAC_H +#define Hacl_HMAC_H #if defined(__cplusplus) extern "C" { @@ -220,5 +220,5 @@ Hacl_HMAC_compute_blake2b_32( } #endif -#define __Hacl_HMAC_H_DEFINED -#endif +#define Hacl_HMAC_H_DEFINED +#endif /* Hacl_HMAC_H */ diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index 21ab2b88c799a6..a5b75d61798949 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -544,11 +544,9 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -860,14 +858,10 @@ static Hacl_Hash_Blake2b_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1059,11 +1053,9 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1200,8 +1192,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1265,8 +1256,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1296,8 +1286,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state10, @@ -1359,8 +1348,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1690,14 +1678,10 @@ Hacl_Hash_Blake2b_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.h b/Modules/_hacl/Hacl_Hash_Blake2b.h index 3a73f358c98cc3..b07893ba339245 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.h +++ b/Modules/_hacl/Hacl_Hash_Blake2b.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_Blake2b_H -#define __Hacl_Hash_Blake2b_H +#ifndef Hacl_Hash_Blake2b_H +#define Hacl_Hash_Blake2b_H #if defined(__cplusplus) extern "C" { @@ -220,5 +220,5 @@ Hacl_Hash_Blake2b_hash_with_key_and_params( } #endif -#define __Hacl_Hash_Blake2b_H_DEFINED -#endif +#define Hacl_Hash_Blake2b_H_DEFINED +#endif /* Hacl_Hash_Blake2b_H */ diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index c4d9b4a689d28f..f955f1c6115b1e 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -274,11 +274,9 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -746,14 +744,10 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -936,11 +930,9 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1075,8 +1067,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1140,8 +1131,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1171,8 +1161,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state10, @@ -1234,8 +1223,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1578,14 +1566,10 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h index 0271ab8e024e51..8be8f32db62b15 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_Blake2b_Simd256_H -#define __Hacl_Hash_Blake2b_Simd256_H +#ifndef Hacl_Hash_Blake2b_Simd256_H +#define Hacl_Hash_Blake2b_Simd256_H #if defined(__cplusplus) extern "C" { @@ -206,5 +206,5 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( } #endif -#define __Hacl_Hash_Blake2b_Simd256_H_DEFINED -#endif +#define Hacl_Hash_Blake2b_Simd256_H_DEFINED +#endif /* Hacl_Hash_Blake2b_Simd256_H */ diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 730ba135afb2fb..0d4fcc7f3951fc 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -543,13 +543,13 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -846,16 +846,14 @@ static Hacl_Hash_Blake2s_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1042,13 +1040,13 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1182,8 +1180,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1237,8 +1234,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1268,8 +1264,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state10, @@ -1321,8 +1316,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1639,16 +1633,14 @@ Hacl_Hash_Blake2s_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.h b/Modules/_hacl/Hacl_Hash_Blake2s.h index fbf8cff5cd1073..2582a3d34e34fc 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.h +++ b/Modules/_hacl/Hacl_Hash_Blake2s.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_Blake2s_H -#define __Hacl_Hash_Blake2s_H +#ifndef Hacl_Hash_Blake2s_H +#define Hacl_Hash_Blake2s_H #if defined(__cplusplus) extern "C" { @@ -198,5 +198,5 @@ Hacl_Hash_Blake2s_hash_with_key_and_params( } #endif -#define __Hacl_Hash_Blake2s_H_DEFINED -#endif +#define Hacl_Hash_Blake2s_H_DEFINED +#endif /* Hacl_Hash_Blake2s_H */ diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index 7e9cd79544f8f1..fa46be045f3441 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -271,13 +271,13 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -736,16 +736,14 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -923,13 +921,13 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1061,8 +1059,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1116,8 +1113,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1147,8 +1143,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state10, @@ -1200,8 +1195,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1531,16 +1525,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h index 56456e6fbb3df8..4b3b4e4fbb5630 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_Blake2s_Simd128_H -#define __Hacl_Hash_Blake2s_Simd128_H +#ifndef Hacl_Hash_Blake2s_Simd128_H +#define Hacl_Hash_Blake2s_Simd128_H #if defined(__cplusplus) extern "C" { @@ -206,5 +206,5 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( } #endif -#define __Hacl_Hash_Blake2s_Simd128_H_DEFINED -#endif +#define Hacl_Hash_Blake2s_Simd128_H_DEFINED +#endif /* Hacl_Hash_Blake2s_Simd128_H */ diff --git a/Modules/_hacl/Hacl_Hash_MD5.c b/Modules/_hacl/Hacl_Hash_MD5.c index 75ce8d2926e6e1..305c75483c06ea 100644 --- a/Modules/_hacl/Hacl_Hash_MD5.c +++ b/Modules/_hacl/Hacl_Hash_MD5.c @@ -66,11 +66,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti0 = _t[0U]; uint32_t v = - vb0 - + - ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) - << 7U - | (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); + vb0 + + ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) << 7U | + (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); abcd[0U] = v; uint32_t va0 = abcd[3U]; uint32_t vb1 = abcd[0U]; @@ -82,11 +80,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti1 = _t[1U]; uint32_t v0 = - vb1 - + - ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) - << 12U - | (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); + vb1 + + ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) << 12U | + (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); abcd[3U] = v0; uint32_t va1 = abcd[2U]; uint32_t vb2 = abcd[3U]; @@ -98,11 +94,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti2 = _t[2U]; uint32_t v1 = - vb2 - + - ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) - << 17U - | (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); + vb2 + + ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) << 17U | + (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); abcd[2U] = v1; uint32_t va2 = abcd[1U]; uint32_t vb3 = abcd[2U]; @@ -114,11 +108,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti3 = _t[3U]; uint32_t v2 = - vb3 - + - ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) - << 22U - | (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); + vb3 + + ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) << 22U | + (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); abcd[1U] = v2; uint32_t va3 = abcd[0U]; uint32_t vb4 = abcd[1U]; @@ -130,11 +122,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti4 = _t[4U]; uint32_t v3 = - vb4 - + - ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) - << 7U - | (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); + vb4 + + ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) << 7U | + (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); abcd[0U] = v3; uint32_t va4 = abcd[3U]; uint32_t vb5 = abcd[0U]; @@ -146,11 +136,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti5 = _t[5U]; uint32_t v4 = - vb5 - + - ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) - << 12U - | (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); + vb5 + + ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) << 12U | + (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); abcd[3U] = v4; uint32_t va5 = abcd[2U]; uint32_t vb6 = abcd[3U]; @@ -162,11 +150,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti6 = _t[6U]; uint32_t v5 = - vb6 - + - ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) - << 17U - | (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); + vb6 + + ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) << 17U | + (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); abcd[2U] = v5; uint32_t va6 = abcd[1U]; uint32_t vb7 = abcd[2U]; @@ -178,11 +164,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti7 = _t[7U]; uint32_t v6 = - vb7 - + - ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) - << 22U - | (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); + vb7 + + ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) << 22U | + (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); abcd[1U] = v6; uint32_t va7 = abcd[0U]; uint32_t vb8 = abcd[1U]; @@ -194,11 +178,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti8 = _t[8U]; uint32_t v7 = - vb8 - + - ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) - << 7U - | (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); + vb8 + + ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) << 7U | + (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); abcd[0U] = v7; uint32_t va8 = abcd[3U]; uint32_t vb9 = abcd[0U]; @@ -210,11 +192,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti9 = _t[9U]; uint32_t v8 = - vb9 - + - ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) - << 12U - | (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); + vb9 + + ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) << 12U | + (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); abcd[3U] = v8; uint32_t va9 = abcd[2U]; uint32_t vb10 = abcd[3U]; @@ -226,11 +206,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti10 = _t[10U]; uint32_t v9 = - vb10 - + - ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) - << 17U - | (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); + vb10 + + ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) << 17U | + (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); abcd[2U] = v9; uint32_t va10 = abcd[1U]; uint32_t vb11 = abcd[2U]; @@ -242,11 +220,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti11 = _t[11U]; uint32_t v10 = - vb11 - + - ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) - << 22U - | (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); + vb11 + + ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) << 22U | + (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); abcd[1U] = v10; uint32_t va11 = abcd[0U]; uint32_t vb12 = abcd[1U]; @@ -258,11 +234,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti12 = _t[12U]; uint32_t v11 = - vb12 - + - ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) - << 7U - | (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); + vb12 + + ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) << 7U | + (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); abcd[0U] = v11; uint32_t va12 = abcd[3U]; uint32_t vb13 = abcd[0U]; @@ -274,11 +248,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti13 = _t[13U]; uint32_t v12 = - vb13 - + - ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) - << 12U - | (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); + vb13 + + ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) << 12U | + (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); abcd[3U] = v12; uint32_t va13 = abcd[2U]; uint32_t vb14 = abcd[3U]; @@ -290,11 +262,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti14 = _t[14U]; uint32_t v13 = - vb14 - + - ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) - << 17U - | (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); + vb14 + + ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) << 17U | + (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); abcd[2U] = v13; uint32_t va14 = abcd[1U]; uint32_t vb15 = abcd[2U]; @@ -306,11 +276,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti15 = _t[15U]; uint32_t v14 = - vb15 - + - ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) - << 22U - | (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); + vb15 + + ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) << 22U | + (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); abcd[1U] = v14; uint32_t va15 = abcd[0U]; uint32_t vb16 = abcd[1U]; @@ -322,11 +290,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti16 = _t[16U]; uint32_t v15 = - vb16 - + - ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) - << 5U - | (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); + vb16 + + ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) << 5U | + (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); abcd[0U] = v15; uint32_t va16 = abcd[3U]; uint32_t vb17 = abcd[0U]; @@ -338,11 +304,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti17 = _t[17U]; uint32_t v16 = - vb17 - + - ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) - << 9U - | (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); + vb17 + + ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) << 9U | + (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); abcd[3U] = v16; uint32_t va17 = abcd[2U]; uint32_t vb18 = abcd[3U]; @@ -354,11 +318,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti18 = _t[18U]; uint32_t v17 = - vb18 - + - ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) - << 14U - | (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); + vb18 + + ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) << 14U | + (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); abcd[2U] = v17; uint32_t va18 = abcd[1U]; uint32_t vb19 = abcd[2U]; @@ -370,11 +332,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti19 = _t[19U]; uint32_t v18 = - vb19 - + - ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) - << 20U - | (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); + vb19 + + ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) << 20U | + (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); abcd[1U] = v18; uint32_t va19 = abcd[0U]; uint32_t vb20 = abcd[1U]; @@ -386,11 +346,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti20 = _t[20U]; uint32_t v19 = - vb20 - + - ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) - << 5U - | (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); + vb20 + + ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) << 5U | + (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); abcd[0U] = v19; uint32_t va20 = abcd[3U]; uint32_t vb21 = abcd[0U]; @@ -402,11 +360,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti21 = _t[21U]; uint32_t v20 = - vb21 - + - ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) - << 9U - | (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); + vb21 + + ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) << 9U | + (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); abcd[3U] = v20; uint32_t va21 = abcd[2U]; uint32_t vb22 = abcd[3U]; @@ -418,11 +374,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti22 = _t[22U]; uint32_t v21 = - vb22 - + - ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) - << 14U - | (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); + vb22 + + ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) << 14U | + (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); abcd[2U] = v21; uint32_t va22 = abcd[1U]; uint32_t vb23 = abcd[2U]; @@ -434,11 +388,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti23 = _t[23U]; uint32_t v22 = - vb23 - + - ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) - << 20U - | (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); + vb23 + + ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) << 20U | + (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); abcd[1U] = v22; uint32_t va23 = abcd[0U]; uint32_t vb24 = abcd[1U]; @@ -450,11 +402,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti24 = _t[24U]; uint32_t v23 = - vb24 - + - ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) - << 5U - | (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); + vb24 + + ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) << 5U | + (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); abcd[0U] = v23; uint32_t va24 = abcd[3U]; uint32_t vb25 = abcd[0U]; @@ -466,11 +416,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti25 = _t[25U]; uint32_t v24 = - vb25 - + - ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) - << 9U - | (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); + vb25 + + ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) << 9U | + (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); abcd[3U] = v24; uint32_t va25 = abcd[2U]; uint32_t vb26 = abcd[3U]; @@ -482,11 +430,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti26 = _t[26U]; uint32_t v25 = - vb26 - + - ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) - << 14U - | (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); + vb26 + + ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) << 14U | + (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); abcd[2U] = v25; uint32_t va26 = abcd[1U]; uint32_t vb27 = abcd[2U]; @@ -498,11 +444,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti27 = _t[27U]; uint32_t v26 = - vb27 - + - ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) - << 20U - | (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); + vb27 + + ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) << 20U | + (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); abcd[1U] = v26; uint32_t va27 = abcd[0U]; uint32_t vb28 = abcd[1U]; @@ -514,11 +458,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti28 = _t[28U]; uint32_t v27 = - vb28 - + - ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) - << 5U - | (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); + vb28 + + ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) << 5U | + (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); abcd[0U] = v27; uint32_t va28 = abcd[3U]; uint32_t vb29 = abcd[0U]; @@ -530,11 +472,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti29 = _t[29U]; uint32_t v28 = - vb29 - + - ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) - << 9U - | (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); + vb29 + + ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) << 9U | + (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); abcd[3U] = v28; uint32_t va29 = abcd[2U]; uint32_t vb30 = abcd[3U]; @@ -546,11 +486,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti30 = _t[30U]; uint32_t v29 = - vb30 - + - ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) - << 14U - | (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); + vb30 + + ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) << 14U | + (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); abcd[2U] = v29; uint32_t va30 = abcd[1U]; uint32_t vb31 = abcd[2U]; @@ -562,11 +500,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti31 = _t[31U]; uint32_t v30 = - vb31 - + - ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) - << 20U - | (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); + vb31 + + ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) << 20U | + (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); abcd[1U] = v30; uint32_t va31 = abcd[0U]; uint32_t vb32 = abcd[1U]; @@ -578,11 +514,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti32 = _t[32U]; uint32_t v31 = - vb32 - + - ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) - << 4U - | (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); + vb32 + + ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) << 4U | + (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); abcd[0U] = v31; uint32_t va32 = abcd[3U]; uint32_t vb33 = abcd[0U]; @@ -594,11 +528,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti33 = _t[33U]; uint32_t v32 = - vb33 - + - ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) - << 11U - | (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); + vb33 + + ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) << 11U | + (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); abcd[3U] = v32; uint32_t va33 = abcd[2U]; uint32_t vb34 = abcd[3U]; @@ -610,11 +542,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti34 = _t[34U]; uint32_t v33 = - vb34 - + - ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) - << 16U - | (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); + vb34 + + ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) << 16U | + (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); abcd[2U] = v33; uint32_t va34 = abcd[1U]; uint32_t vb35 = abcd[2U]; @@ -626,11 +556,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti35 = _t[35U]; uint32_t v34 = - vb35 - + - ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) - << 23U - | (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); + vb35 + + ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) << 23U | + (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); abcd[1U] = v34; uint32_t va35 = abcd[0U]; uint32_t vb36 = abcd[1U]; @@ -642,11 +570,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti36 = _t[36U]; uint32_t v35 = - vb36 - + - ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) - << 4U - | (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); + vb36 + + ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) << 4U | + (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); abcd[0U] = v35; uint32_t va36 = abcd[3U]; uint32_t vb37 = abcd[0U]; @@ -658,11 +584,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti37 = _t[37U]; uint32_t v36 = - vb37 - + - ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) - << 11U - | (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); + vb37 + + ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) << 11U | + (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); abcd[3U] = v36; uint32_t va37 = abcd[2U]; uint32_t vb38 = abcd[3U]; @@ -674,11 +598,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti38 = _t[38U]; uint32_t v37 = - vb38 - + - ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) - << 16U - | (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); + vb38 + + ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) << 16U | + (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); abcd[2U] = v37; uint32_t va38 = abcd[1U]; uint32_t vb39 = abcd[2U]; @@ -690,11 +612,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti39 = _t[39U]; uint32_t v38 = - vb39 - + - ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) - << 23U - | (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); + vb39 + + ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) << 23U | + (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); abcd[1U] = v38; uint32_t va39 = abcd[0U]; uint32_t vb40 = abcd[1U]; @@ -706,11 +626,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti40 = _t[40U]; uint32_t v39 = - vb40 - + - ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) - << 4U - | (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); + vb40 + + ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) << 4U | + (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); abcd[0U] = v39; uint32_t va40 = abcd[3U]; uint32_t vb41 = abcd[0U]; @@ -722,11 +640,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti41 = _t[41U]; uint32_t v40 = - vb41 - + - ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) - << 11U - | (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); + vb41 + + ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) << 11U | + (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); abcd[3U] = v40; uint32_t va41 = abcd[2U]; uint32_t vb42 = abcd[3U]; @@ -738,11 +654,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti42 = _t[42U]; uint32_t v41 = - vb42 - + - ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) - << 16U - | (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); + vb42 + + ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) << 16U | + (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); abcd[2U] = v41; uint32_t va42 = abcd[1U]; uint32_t vb43 = abcd[2U]; @@ -754,11 +668,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti43 = _t[43U]; uint32_t v42 = - vb43 - + - ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) - << 23U - | (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); + vb43 + + ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) << 23U | + (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); abcd[1U] = v42; uint32_t va43 = abcd[0U]; uint32_t vb44 = abcd[1U]; @@ -770,11 +682,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti44 = _t[44U]; uint32_t v43 = - vb44 - + - ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) - << 4U - | (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); + vb44 + + ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) << 4U | + (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); abcd[0U] = v43; uint32_t va44 = abcd[3U]; uint32_t vb45 = abcd[0U]; @@ -786,11 +696,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti45 = _t[45U]; uint32_t v44 = - vb45 - + - ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) - << 11U - | (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); + vb45 + + ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) << 11U | + (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); abcd[3U] = v44; uint32_t va45 = abcd[2U]; uint32_t vb46 = abcd[3U]; @@ -802,11 +710,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti46 = _t[46U]; uint32_t v45 = - vb46 - + - ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) - << 16U - | (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); + vb46 + + ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) << 16U | + (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); abcd[2U] = v45; uint32_t va46 = abcd[1U]; uint32_t vb47 = abcd[2U]; @@ -818,11 +724,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti47 = _t[47U]; uint32_t v46 = - vb47 - + - ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) - << 23U - | (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); + vb47 + + ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) << 23U | + (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); abcd[1U] = v46; uint32_t va47 = abcd[0U]; uint32_t vb48 = abcd[1U]; @@ -834,11 +738,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti48 = _t[48U]; uint32_t v47 = - vb48 - + - ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) - << 6U - | (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); + vb48 + + ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) << 6U | + (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); abcd[0U] = v47; uint32_t va48 = abcd[3U]; uint32_t vb49 = abcd[0U]; @@ -850,11 +752,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti49 = _t[49U]; uint32_t v48 = - vb49 - + - ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) - << 10U - | (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); + vb49 + + ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) << 10U | + (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); abcd[3U] = v48; uint32_t va49 = abcd[2U]; uint32_t vb50 = abcd[3U]; @@ -866,11 +766,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti50 = _t[50U]; uint32_t v49 = - vb50 - + - ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) - << 15U - | (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); + vb50 + + ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) << 15U | + (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); abcd[2U] = v49; uint32_t va50 = abcd[1U]; uint32_t vb51 = abcd[2U]; @@ -882,11 +780,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti51 = _t[51U]; uint32_t v50 = - vb51 - + - ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) - << 21U - | (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); + vb51 + + ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) << 21U | + (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); abcd[1U] = v50; uint32_t va51 = abcd[0U]; uint32_t vb52 = abcd[1U]; @@ -898,11 +794,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti52 = _t[52U]; uint32_t v51 = - vb52 - + - ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) - << 6U - | (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); + vb52 + + ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) << 6U | + (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); abcd[0U] = v51; uint32_t va52 = abcd[3U]; uint32_t vb53 = abcd[0U]; @@ -914,11 +808,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti53 = _t[53U]; uint32_t v52 = - vb53 - + - ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) - << 10U - | (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); + vb53 + + ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) << 10U | + (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); abcd[3U] = v52; uint32_t va53 = abcd[2U]; uint32_t vb54 = abcd[3U]; @@ -930,11 +822,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti54 = _t[54U]; uint32_t v53 = - vb54 - + - ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) - << 15U - | (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); + vb54 + + ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) << 15U | + (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); abcd[2U] = v53; uint32_t va54 = abcd[1U]; uint32_t vb55 = abcd[2U]; @@ -946,11 +836,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti55 = _t[55U]; uint32_t v54 = - vb55 - + - ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) - << 21U - | (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); + vb55 + + ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) << 21U | + (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); abcd[1U] = v54; uint32_t va55 = abcd[0U]; uint32_t vb56 = abcd[1U]; @@ -962,11 +850,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti56 = _t[56U]; uint32_t v55 = - vb56 - + - ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) - << 6U - | (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); + vb56 + + ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) << 6U | + (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); abcd[0U] = v55; uint32_t va56 = abcd[3U]; uint32_t vb57 = abcd[0U]; @@ -978,11 +864,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti57 = _t[57U]; uint32_t v56 = - vb57 - + - ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) - << 10U - | (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); + vb57 + + ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) << 10U | + (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); abcd[3U] = v56; uint32_t va57 = abcd[2U]; uint32_t vb58 = abcd[3U]; @@ -994,11 +878,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti58 = _t[58U]; uint32_t v57 = - vb58 - + - ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) - << 15U - | (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); + vb58 + + ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) << 15U | + (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); abcd[2U] = v57; uint32_t va58 = abcd[1U]; uint32_t vb59 = abcd[2U]; @@ -1010,11 +892,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti59 = _t[59U]; uint32_t v58 = - vb59 - + - ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) - << 21U - | (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); + vb59 + + ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) << 21U | + (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); abcd[1U] = v58; uint32_t va59 = abcd[0U]; uint32_t vb60 = abcd[1U]; @@ -1026,11 +906,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti60 = _t[60U]; uint32_t v59 = - vb60 - + - ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) - << 6U - | (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); + vb60 + + ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) << 6U | + (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); abcd[0U] = v59; uint32_t va60 = abcd[3U]; uint32_t vb61 = abcd[0U]; @@ -1042,11 +920,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti61 = _t[61U]; uint32_t v60 = - vb61 - + - ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) - << 10U - | (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); + vb61 + + ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) << 10U | + (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); abcd[3U] = v60; uint32_t va61 = abcd[2U]; uint32_t vb62 = abcd[3U]; @@ -1058,11 +934,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti62 = _t[62U]; uint32_t v61 = - vb62 - + - ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) - << 15U - | (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); + vb62 + + ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) << 15U | + (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); abcd[2U] = v61; uint32_t va62 = abcd[1U]; uint32_t vb = abcd[2U]; @@ -1074,11 +948,8 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti = _t[63U]; uint32_t v62 = - vb - + - ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) - << 21U - | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); + vb + + ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) << 21U | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); abcd[1U] = v62; uint32_t a = abcd[0U]; uint32_t b = abcd[1U]; @@ -1282,8 +1153,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1328,8 +1198,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1359,8 +1228,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -1403,8 +1271,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_MD5.h b/Modules/_hacl/Hacl_Hash_MD5.h index 521c2addc50289..b220bbf6890ffa 100644 --- a/Modules/_hacl/Hacl_Hash_MD5.h +++ b/Modules/_hacl/Hacl_Hash_MD5.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_MD5_H -#define __Hacl_Hash_MD5_H +#ifndef Hacl_Hash_MD5_H +#define Hacl_Hash_MD5_H #if defined(__cplusplus) extern "C" { @@ -62,5 +62,5 @@ void Hacl_Hash_MD5_hash(uint8_t *output, uint8_t *input, uint32_t input_len); } #endif -#define __Hacl_Hash_MD5_H_DEFINED -#endif +#define Hacl_Hash_MD5_H_DEFINED +#endif /* Hacl_Hash_MD5_H */ diff --git a/Modules/_hacl/Hacl_Hash_SHA1.c b/Modules/_hacl/Hacl_Hash_SHA1.c index 508e447bf275da..97bd4f2204cf24 100644 --- a/Modules/_hacl/Hacl_Hash_SHA1.c +++ b/Modules/_hacl/Hacl_Hash_SHA1.c @@ -315,8 +315,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -361,8 +360,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -392,8 +390,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -436,8 +433,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA1.h b/Modules/_hacl/Hacl_Hash_SHA1.h index 63ac83f9dce018..7fc8c8cfd205d9 100644 --- a/Modules/_hacl/Hacl_Hash_SHA1.h +++ b/Modules/_hacl/Hacl_Hash_SHA1.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_SHA1_H -#define __Hacl_Hash_SHA1_H +#ifndef Hacl_Hash_SHA1_H +#define Hacl_Hash_SHA1_H #if defined(__cplusplus) extern "C" { @@ -62,5 +62,5 @@ void Hacl_Hash_SHA1_hash(uint8_t *output, uint8_t *input, uint32_t input_len); } #endif -#define __Hacl_Hash_SHA1_H_DEFINED -#endif +#define Hacl_Hash_SHA1_H_DEFINED +#endif /* Hacl_Hash_SHA1_H */ diff --git a/Modules/_hacl/Hacl_Hash_SHA2.c b/Modules/_hacl/Hacl_Hash_SHA2.c index d612bafa72cdc4..d2ee0c9ef51721 100644 --- a/Modules/_hacl/Hacl_Hash_SHA2.c +++ b/Modules/_hacl/Hacl_Hash_SHA2.c @@ -100,15 +100,14 @@ static inline void sha256_update(uint8_t *b, uint32_t *hash) uint32_t k_e_t = k_t; uint32_t t1 = - h02 - + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint32_t t2 = - ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint32_t a1 = t1 + t2; uint32_t b1 = a0; uint32_t c1 = b0; @@ -301,15 +300,14 @@ static inline void sha512_update(uint8_t *b, uint64_t *hash) uint64_t k_e_t = k_t; uint64_t t1 = - h02 - + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint64_t t2 = - ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint64_t a1 = t1 + t2; uint64_t b1 = a0; uint64_t c1 = b0; @@ -639,8 +637,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -685,8 +682,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -716,8 +712,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -760,8 +755,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1205,8 +1199,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1251,8 +1244,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1282,8 +1274,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state10, @@ -1326,8 +1317,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA2.h b/Modules/_hacl/Hacl_Hash_SHA2.h index a93138fb7ee7a7..dd168b8aa1a424 100644 --- a/Modules/_hacl/Hacl_Hash_SHA2.h +++ b/Modules/_hacl/Hacl_Hash_SHA2.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_SHA2_H -#define __Hacl_Hash_SHA2_H +#ifndef Hacl_Hash_SHA2_H +#define Hacl_Hash_SHA2_H #if defined(__cplusplus) extern "C" { @@ -199,5 +199,5 @@ void Hacl_Hash_SHA2_hash_384(uint8_t *output, uint8_t *input, uint32_t input_len } #endif -#define __Hacl_Hash_SHA2_H_DEFINED -#endif +#define Hacl_Hash_SHA2_H_DEFINED +#endif /* Hacl_Hash_SHA2_H */ diff --git a/Modules/_hacl/Hacl_Hash_SHA3.c b/Modules/_hacl/Hacl_Hash_SHA3.c index 87638df9549fbb..466d2b96c0cdfa 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.c +++ b/Modules/_hacl/Hacl_Hash_SHA3.c @@ -866,8 +866,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ((Hacl_Hash_SHA3_state_t){ .block_state = block_state1, .buf = buf, .total_len = total_len2 }); } else if (sz == 0U) @@ -910,8 +909,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -941,8 +939,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state10, @@ -972,10 +969,8 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(i) - == 0ULL - && (uint64_t)(chunk_len - diff) > 0ULL + (uint64_t)(chunk_len - diff) % (uint64_t)block_len(i) == 0ULL && + (uint64_t)(chunk_len - diff) > 0ULL ) { ite = block_len(i); @@ -994,8 +989,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -2422,9 +2416,7 @@ Hacl_Hash_SHA3_shake128_squeeze_nblocks( 5U, 1U, _C[i] = - state[i - + 0U] - ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); KRML_MAYBE_FOR5(i2, 0U, 5U, diff --git a/Modules/_hacl/Hacl_Hash_SHA3.h b/Modules/_hacl/Hacl_Hash_SHA3.h index 65ec99ee3a3ac9..df6ebe439e98b1 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.h +++ b/Modules/_hacl/Hacl_Hash_SHA3.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Hash_SHA3_H -#define __Hacl_Hash_SHA3_H +#ifndef Hacl_Hash_SHA3_H +#define Hacl_Hash_SHA3_H #if defined(__cplusplus) extern "C" { @@ -155,5 +155,5 @@ Hacl_Hash_SHA3_shake128_squeeze_nblocks( } #endif -#define __Hacl_Hash_SHA3_H_DEFINED -#endif +#define Hacl_Hash_SHA3_H_DEFINED +#endif /* Hacl_Hash_SHA3_H */ diff --git a/Modules/_hacl/Hacl_Streaming_HMAC.c b/Modules/_hacl/Hacl_Streaming_HMAC.c index d28b39792af576..f89c00ee1ae416 100644 --- a/Modules/_hacl/Hacl_Streaming_HMAC.c +++ b/Modules/_hacl/Hacl_Streaming_HMAC.c @@ -198,8 +198,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); + st[0U] = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); } if (st == NULL) { @@ -220,8 +219,8 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); + st[0U] = + ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); } if (st == NULL) { @@ -242,8 +241,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_224_a, @@ -270,8 +268,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_256_a, @@ -298,8 +295,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_384_a, @@ -326,8 +322,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_512_a, @@ -354,8 +349,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_224_a, @@ -382,8 +376,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_256_a, @@ -410,8 +403,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_384_a, @@ -438,8 +430,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_512_a, @@ -466,8 +457,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_a, @@ -495,8 +485,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_128_a, @@ -531,8 +520,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_a, @@ -560,8 +548,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_256_a, @@ -2059,8 +2046,8 @@ Hacl_Streaming_HMAC_update( Hacl_Streaming_HMAC_Definitions_index i1 = Hacl_Streaming_HMAC_index_of_state(block_state); if ( - (uint64_t)chunk_len - > max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len + (uint64_t)chunk_len > + max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len ) { return Hacl_Streaming_Types_MaximumLengthExceeded; @@ -2068,9 +2055,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2079,8 +2064,8 @@ Hacl_Streaming_HMAC_update( else { sz = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (chunk_len <= block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - sz) { @@ -2091,9 +2076,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2102,14 +2085,13 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2127,9 +2109,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2138,8 +2118,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2153,8 +2133,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)chunk_len > 0ULL ) @@ -2164,8 +2144,8 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = (chunk_len - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); @@ -2178,8 +2158,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2200,9 +2179,8 @@ Hacl_Streaming_HMAC_update( uint32_t sz10; if ( - total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len10 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == + 0ULL && total_len10 > 0ULL ) { @@ -2211,14 +2189,13 @@ Hacl_Streaming_HMAC_update( else { sz10 = - (uint32_t)(total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len10 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state10, @@ -2233,9 +2210,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2244,8 +2219,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2259,8 +2234,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL ) @@ -2270,13 +2245,12 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = - (chunk_len - diff - ite) - / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); + (chunk_len - diff - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data1_len = n_blocks * block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data2_len = chunk_len - diff - data1_len; @@ -2286,8 +2260,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2324,9 +2297,7 @@ Hacl_Streaming_HMAC_digest( uint32_t r; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2335,8 +2306,8 @@ Hacl_Streaming_HMAC_digest( else { r = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf_1 = buf_; Hacl_Agile_Hash_state_s *s110 = malloc_(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)); @@ -2404,9 +2375,13 @@ Hacl_Streaming_HMAC_digest( Hacl_Agile_Hash_state_s *s112 = tmp_block_state1.snd; update_multi(s112, prev_len, buf_multi, 0U); uint64_t prev_len_last = total_len - (uint64_t)r; - Hacl_Agile_Hash_state_s *s11 = tmp_block_state1.snd; - update_last(s11, prev_len_last, buf_last, r); + Hacl_Agile_Hash_state_s *s113 = tmp_block_state1.snd; + update_last(s113, prev_len_last, buf_last, r); finish0(tmp_block_state1, output); + Hacl_Agile_Hash_state_s *s210 = tmp_block_state1.thd; + Hacl_Agile_Hash_state_s *s11 = tmp_block_state1.snd; + free_(s11); + free_(s210); return Hacl_Streaming_Types_Success; } KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", diff --git a/Modules/_hacl/Hacl_Streaming_HMAC.h b/Modules/_hacl/Hacl_Streaming_HMAC.h index a0806c02d0bb9f..6c302b1f92d6a3 100644 --- a/Modules/_hacl/Hacl_Streaming_HMAC.h +++ b/Modules/_hacl/Hacl_Streaming_HMAC.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Streaming_HMAC_H -#define __Hacl_Streaming_HMAC_H +#ifndef Hacl_Streaming_HMAC_H +#define Hacl_Streaming_HMAC_H #if defined(__cplusplus) extern "C" { @@ -130,5 +130,5 @@ Hacl_Streaming_HMAC_agile_state } #endif -#define __Hacl_Streaming_HMAC_H_DEFINED -#endif +#define Hacl_Streaming_HMAC_H_DEFINED +#endif /* Hacl_Streaming_HMAC_H */ diff --git a/Modules/_hacl/Hacl_Streaming_Types.h b/Modules/_hacl/Hacl_Streaming_Types.h index 5c497750793720..cce25eb7946750 100644 --- a/Modules/_hacl/Hacl_Streaming_Types.h +++ b/Modules/_hacl/Hacl_Streaming_Types.h @@ -23,8 +23,8 @@ */ -#ifndef __Hacl_Streaming_Types_H -#define __Hacl_Streaming_Types_H +#ifndef Hacl_Streaming_Types_H +#define Hacl_Streaming_Types_H #if defined(__cplusplus) extern "C" { @@ -68,5 +68,5 @@ typedef struct Hacl_Streaming_MD_state_64_s Hacl_Streaming_MD_state_64; } #endif -#define __Hacl_Streaming_Types_H_DEFINED -#endif +#define Hacl_Streaming_Types_H_DEFINED +#endif /* Hacl_Streaming_Types_H */ diff --git a/Modules/_hacl/Lib_Memzero0.c b/Modules/_hacl/Lib_Memzero0.c index 28abd1aa4e2d54..f94e0e2254a912 100644 --- a/Modules/_hacl/Lib_Memzero0.c +++ b/Modules/_hacl/Lib_Memzero0.c @@ -11,18 +11,18 @@ #if defined(__APPLE__) && defined(__MACH__) #include <AvailabilityMacros.h> // memset_s is available from macOS 10.9, iOS 7, watchOS 2, and on all tvOS and visionOS versions. -# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && (MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9)) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) -# define APPLE_HAS_MEMSET_S 1 +# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_X_VERSION_10_9) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9)) +# define APPLE_HAS_MEMSET_S +# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && defined(__IPHONE_7_0) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_TV) && TARGET_OS_TV) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S +# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && defined(__WATCHOS_2_0) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_VISION) && TARGET_OS_VISION) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S # else -# define APPLE_HAS_MEMSET_S 0 +# undef APPLE_HAS_MEMSET_S # endif #endif @@ -55,7 +55,7 @@ void Lib_Memzero0_memzero0(void *dst, uint64_t len) { #ifdef _WIN32 SecureZeroMemory(dst, len_); - #elif defined(__APPLE__) && defined(__MACH__) && APPLE_HAS_MEMSET_S + #elif defined(__APPLE__) && defined(__MACH__) && defined(APPLE_HAS_MEMSET_S) memset_s(dst, len_, 0, len_); #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__) || defined(__OpenBSD__) explicit_bzero(dst, len_); diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index d4a90220beafb5..7aa2a6d83f9c4b 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -4,8 +4,8 @@ */ -#ifndef __FStar_UInt128_Verified_H -#define __FStar_UInt128_Verified_H +#ifndef FStar_UInt128_Verified_H +#define FStar_UInt128_Verified_H #include "FStar_UInt_8_16_32_64.h" #include <inttypes.h> @@ -257,11 +257,11 @@ FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { FStar_UInt128_uint128 lit; lit.low = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); lit.high = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); return lit; } @@ -294,14 +294,12 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); lit.high = - ((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; return lit; } @@ -315,32 +313,23 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); lit.high = - (x >> FStar_UInt128_u32_32) - * (y >> FStar_UInt128_u32_32) - + - (((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + - ((FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> FStar_UInt128_u32_32); return lit; } -#define __FStar_UInt128_Verified_H_DEFINED -#endif +#define FStar_UInt128_Verified_H_DEFINED +#endif /* FStar_UInt128_Verified_H */ diff --git a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h index 00be80836574af..2720348bbb79e6 100644 --- a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h +++ b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h @@ -4,8 +4,8 @@ */ -#ifndef __FStar_UInt_8_16_32_64_H -#define __FStar_UInt_8_16_32_64_H +#ifndef FStar_UInt_8_16_32_64_H +#define FStar_UInt_8_16_32_64_H #include <inttypes.h> #include <stdbool.h> @@ -30,6 +30,8 @@ extern uint64_t FStar_UInt64_zero; extern uint64_t FStar_UInt64_one; +extern bool FStar_UInt64_ne(uint64_t a, uint64_t b); + extern uint64_t FStar_UInt64_minus(uint64_t a); extern uint32_t FStar_UInt64_n_minus_one; @@ -80,6 +82,8 @@ extern uint32_t FStar_UInt32_zero; extern uint32_t FStar_UInt32_one; +extern bool FStar_UInt32_ne(uint32_t a, uint32_t b); + extern uint32_t FStar_UInt32_minus(uint32_t a); extern uint32_t FStar_UInt32_n_minus_one; @@ -130,6 +134,8 @@ extern uint16_t FStar_UInt16_zero; extern uint16_t FStar_UInt16_one; +extern bool FStar_UInt16_ne(uint16_t a, uint16_t b); + extern uint16_t FStar_UInt16_minus(uint16_t a); extern uint32_t FStar_UInt16_n_minus_one; @@ -180,6 +186,8 @@ extern uint8_t FStar_UInt8_zero; extern uint8_t FStar_UInt8_one; +extern bool FStar_UInt8_ne(uint8_t a, uint8_t b); + extern uint8_t FStar_UInt8_minus(uint8_t a); extern uint32_t FStar_UInt8_n_minus_one; @@ -217,5 +225,5 @@ extern uint8_t FStar_UInt8_of_string(Prims_string uu___); typedef uint8_t FStar_UInt8_byte; -#define __FStar_UInt_8_16_32_64_H_DEFINED -#endif +#define FStar_UInt_8_16_32_64_H_DEFINED +#endif /* FStar_UInt_8_16_32_64_H */ diff --git a/Modules/_hacl/include/krml/internal/target.h b/Modules/_hacl/include/krml/internal/target.h index c592214634a3bd..73555ab5ca19a4 100644 --- a/Modules/_hacl/include/krml/internal/target.h +++ b/Modules/_hacl/include/krml/internal/target.h @@ -1,8 +1,8 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. Licensed under the Apache 2.0 and MIT Licenses. */ -#ifndef __KRML_TARGET_H -#define __KRML_TARGET_H +#ifndef KRML_HEADER_TARGET_H +#define KRML_HEADER_TARGET_H #include <assert.h> #include <inttypes.h> @@ -12,6 +12,9 @@ #include <stdio.h> #include <stdlib.h> +typedef float float32_t; +typedef double float64_t; + /* Since KaRaMeL emits the inline keyword unconditionally, we follow the * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this * __inline__ to ensure the code compiles with -std=c90 and earlier. */ @@ -425,4 +428,4 @@ inline static int32_t krml_time(void) { #else # define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) #endif -#endif +#endif /* KRML_HEADER_TARGET_H */ diff --git a/Modules/_hacl/include/krml/internal/types.h b/Modules/_hacl/include/krml/internal/types.h index 2280dfad48db1e..d96ed19fcca5ac 100644 --- a/Modules/_hacl/include/krml/internal/types.h +++ b/Modules/_hacl/include/krml/internal/types.h @@ -92,7 +92,7 @@ typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; /* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64, * then don't bring the uint128 definitions into scope. */ -#ifndef __FStar_UInt_8_16_32_64_H +#ifndef FStar_UInt_8_16_32_64_H #if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) #include "fstar_uint128_msvc.h" diff --git a/Modules/_hacl/include/krml/lowstar_endianness.h b/Modules/_hacl/include/krml/lowstar_endianness.h index af6b882cf259cc..5f706fa51aad10 100644 --- a/Modules/_hacl/include/krml/lowstar_endianness.h +++ b/Modules/_hacl/include/krml/lowstar_endianness.h @@ -1,8 +1,8 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. Licensed under the Apache 2.0 and MIT Licenses. */ -#ifndef __LOWSTAR_ENDIANNESS_H -#define __LOWSTAR_ENDIANNESS_H +#ifndef KRML_HEADER_LOWSTAR_ENDIANNESS_H +#define KRML_HEADER_LOWSTAR_ENDIANNESS_H #include <string.h> #include <inttypes.h> @@ -228,4 +228,4 @@ inline static void store64(uint8_t *b, uint64_t i) { #define load128_be0 load128_be #define store128_be0 store128_be -#endif +#endif /* KRML_HEADER_LOWSTAR_ENDIANNESS_H */ diff --git a/Modules/_hacl/internal/Hacl_HMAC.h b/Modules/_hacl/internal/Hacl_HMAC.h index ad29d50760c3aa..ef9f25f5d420d3 100644 --- a/Modules/_hacl/internal/Hacl_HMAC.h +++ b/Modules/_hacl/internal/Hacl_HMAC.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_HMAC_H -#define __internal_Hacl_HMAC_H +#ifndef internal_Hacl_HMAC_H +#define internal_Hacl_HMAC_H #if defined(__cplusplus) extern "C" { @@ -48,5 +48,5 @@ K___uint32_t_uint32_t; } #endif -#define __internal_Hacl_HMAC_H_DEFINED -#endif +#define internal_Hacl_HMAC_H_DEFINED +#endif /* internal_Hacl_HMAC_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2b.h b/Modules/_hacl/internal/Hacl_Hash_Blake2b.h index e74c320e073f4b..7ca8e10e34cbbc 100644 --- a/Modules/_hacl/internal/Hacl_Hash_Blake2b.h +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2b.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_Blake2b_H -#define __internal_Hacl_Hash_Blake2b_H +#ifndef internal_Hacl_Hash_Blake2b_H +#define internal_Hacl_Hash_Blake2b_H #if defined(__cplusplus) extern "C" { @@ -91,5 +91,5 @@ Hacl_Hash_Blake2b_state_t; } #endif -#define __internal_Hacl_Hash_Blake2b_H_DEFINED -#endif +#define internal_Hacl_Hash_Blake2b_H_DEFINED +#endif /* internal_Hacl_Hash_Blake2b_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h b/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h index 27633f22b24f0d..6507d287922eec 100644 --- a/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_Blake2b_Simd256_H -#define __internal_Hacl_Hash_Blake2b_Simd256_H +#ifndef internal_Hacl_Hash_Blake2b_Simd256_H +#define internal_Hacl_Hash_Blake2b_Simd256_H #if defined(__cplusplus) extern "C" { @@ -134,5 +134,5 @@ Hacl_Hash_Blake2b_Simd256_state_t; } #endif -#define __internal_Hacl_Hash_Blake2b_Simd256_H_DEFINED -#endif +#define internal_Hacl_Hash_Blake2b_Simd256_H_DEFINED +#endif /* internal_Hacl_Hash_Blake2b_Simd256_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2s.h b/Modules/_hacl/internal/Hacl_Hash_Blake2s.h index 0c5781df8cea81..5fa5bb34efc0e0 100644 --- a/Modules/_hacl/internal/Hacl_Hash_Blake2s.h +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2s.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_Blake2s_H -#define __internal_Hacl_Hash_Blake2s_H +#ifndef internal_Hacl_Hash_Blake2s_H +#define internal_Hacl_Hash_Blake2s_H #if defined(__cplusplus) extern "C" { @@ -90,5 +90,5 @@ Hacl_Hash_Blake2s_state_t; } #endif -#define __internal_Hacl_Hash_Blake2s_H_DEFINED -#endif +#define internal_Hacl_Hash_Blake2s_H_DEFINED +#endif /* internal_Hacl_Hash_Blake2s_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h b/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h index 0f5db552d6cfed..3220cddac30288 100644 --- a/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_Blake2s_Simd128_H -#define __internal_Hacl_Hash_Blake2s_Simd128_H +#ifndef internal_Hacl_Hash_Blake2s_Simd128_H +#define internal_Hacl_Hash_Blake2s_Simd128_H #if defined(__cplusplus) extern "C" { @@ -134,5 +134,5 @@ Hacl_Hash_Blake2s_Simd128_state_t; } #endif -#define __internal_Hacl_Hash_Blake2s_Simd128_H_DEFINED -#endif +#define internal_Hacl_Hash_Blake2s_Simd128_H_DEFINED +#endif /* internal_Hacl_Hash_Blake2s_Simd128_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_MD5.h b/Modules/_hacl/internal/Hacl_Hash_MD5.h index 7fe71a49c6df85..d8761de99f5e2a 100644 --- a/Modules/_hacl/internal/Hacl_Hash_MD5.h +++ b/Modules/_hacl/internal/Hacl_Hash_MD5.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_MD5_H -#define __internal_Hacl_Hash_MD5_H +#ifndef internal_Hacl_Hash_MD5_H +#define internal_Hacl_Hash_MD5_H #if defined(__cplusplus) extern "C" { @@ -53,5 +53,5 @@ void Hacl_Hash_MD5_hash_oneshot(uint8_t *output, uint8_t *input, uint32_t input_ } #endif -#define __internal_Hacl_Hash_MD5_H_DEFINED -#endif +#define internal_Hacl_Hash_MD5_H_DEFINED +#endif /* internal_Hacl_Hash_MD5_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_SHA1.h b/Modules/_hacl/internal/Hacl_Hash_SHA1.h index ed53be559fe465..0e9f1c911f6165 100644 --- a/Modules/_hacl/internal/Hacl_Hash_SHA1.h +++ b/Modules/_hacl/internal/Hacl_Hash_SHA1.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_SHA1_H -#define __internal_Hacl_Hash_SHA1_H +#ifndef internal_Hacl_Hash_SHA1_H +#define internal_Hacl_Hash_SHA1_H #if defined(__cplusplus) extern "C" { @@ -52,5 +52,5 @@ void Hacl_Hash_SHA1_hash_oneshot(uint8_t *output, uint8_t *input, uint32_t input } #endif -#define __internal_Hacl_Hash_SHA1_H_DEFINED -#endif +#define internal_Hacl_Hash_SHA1_H_DEFINED +#endif /* internal_Hacl_Hash_SHA1_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_SHA2.h b/Modules/_hacl/internal/Hacl_Hash_SHA2.h index 98498ee9376996..e6750207980aef 100644 --- a/Modules/_hacl/internal/Hacl_Hash_SHA2.h +++ b/Modules/_hacl/internal/Hacl_Hash_SHA2.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_SHA2_H -#define __internal_Hacl_Hash_SHA2_H +#ifndef internal_Hacl_Hash_SHA2_H +#define internal_Hacl_Hash_SHA2_H #if defined(__cplusplus) extern "C" { @@ -161,5 +161,5 @@ void Hacl_Hash_SHA2_sha384_finish(uint64_t *st, uint8_t *h); } #endif -#define __internal_Hacl_Hash_SHA2_H_DEFINED -#endif +#define internal_Hacl_Hash_SHA2_H_DEFINED +#endif /* internal_Hacl_Hash_SHA2_H */ diff --git a/Modules/_hacl/internal/Hacl_Hash_SHA3.h b/Modules/_hacl/internal/Hacl_Hash_SHA3.h index e653c73b1d03f7..c08a4e7858bbab 100644 --- a/Modules/_hacl/internal/Hacl_Hash_SHA3.h +++ b/Modules/_hacl/internal/Hacl_Hash_SHA3.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Hash_SHA3_H -#define __internal_Hacl_Hash_SHA3_H +#ifndef internal_Hacl_Hash_SHA3_H +#define internal_Hacl_Hash_SHA3_H #if defined(__cplusplus) extern "C" { @@ -81,5 +81,5 @@ Hacl_Hash_SHA3_state_t; } #endif -#define __internal_Hacl_Hash_SHA3_H_DEFINED -#endif +#define internal_Hacl_Hash_SHA3_H_DEFINED +#endif /* internal_Hacl_Hash_SHA3_H */ diff --git a/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h b/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h index fb3a045cd5c608..2726cc484780e8 100644 --- a/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h +++ b/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Impl_Blake2_Constants_H -#define __internal_Hacl_Impl_Blake2_Constants_H +#ifndef internal_Hacl_Impl_Blake2_Constants_H +#define internal_Hacl_Impl_Blake2_Constants_H #if defined(__cplusplus) extern "C" { @@ -69,5 +69,5 @@ Hacl_Hash_Blake2b_ivTable_B[8U] = } #endif -#define __internal_Hacl_Impl_Blake2_Constants_H_DEFINED -#endif +#define internal_Hacl_Impl_Blake2_Constants_H_DEFINED +#endif /* internal_Hacl_Impl_Blake2_Constants_H */ diff --git a/Modules/_hacl/internal/Hacl_Streaming_HMAC.h b/Modules/_hacl/internal/Hacl_Streaming_HMAC.h index acc4f3996026ee..fb44f707463e04 100644 --- a/Modules/_hacl/internal/Hacl_Streaming_HMAC.h +++ b/Modules/_hacl/internal/Hacl_Streaming_HMAC.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Streaming_HMAC_H -#define __internal_Hacl_Streaming_HMAC_H +#ifndef internal_Hacl_Streaming_HMAC_H +#define internal_Hacl_Streaming_HMAC_H #if defined(__cplusplus) extern "C" { @@ -90,5 +90,5 @@ Hacl_Streaming_HMAC_agile_state; } #endif -#define __internal_Hacl_Streaming_HMAC_H_DEFINED -#endif +#define internal_Hacl_Streaming_HMAC_H_DEFINED +#endif /* internal_Hacl_Streaming_HMAC_H */ diff --git a/Modules/_hacl/internal/Hacl_Streaming_Types.h b/Modules/_hacl/internal/Hacl_Streaming_Types.h index fed3cbd425917c..d6c4fc0d6255f7 100644 --- a/Modules/_hacl/internal/Hacl_Streaming_Types.h +++ b/Modules/_hacl/internal/Hacl_Streaming_Types.h @@ -23,8 +23,8 @@ */ -#ifndef __internal_Hacl_Streaming_Types_H -#define __internal_Hacl_Streaming_Types_H +#ifndef internal_Hacl_Streaming_Types_H +#define internal_Hacl_Streaming_Types_H #if defined(__cplusplus) extern "C" { @@ -83,5 +83,5 @@ Hacl_Streaming_MD_state_64; } #endif -#define __internal_Hacl_Streaming_Types_H_DEFINED -#endif +#define internal_Hacl_Streaming_Types_H_DEFINED +#endif /* internal_Hacl_Streaming_Types_H */ diff --git a/Modules/_hacl/python_hacl_namespaces.h b/Modules/_hacl/python_hacl_namespaces.h index 1c2f7fea5c837a..d0b4500395e7c3 100644 --- a/Modules/_hacl/python_hacl_namespaces.h +++ b/Modules/_hacl/python_hacl_namespaces.h @@ -2,95 +2,43 @@ #define _PYTHON_HACL_NAMESPACES_H /* - * C's excuse for namespaces: Use globally unique names to avoid linkage - * conflicts with builds linking or dynamically loading other code potentially - * using HACL* libraries. + * Use globally unique names to avoid linkage conflicts with builds linking + * or dynamically loading other code potentially using HACL* libraries. * - * Something like this to generate new entries for the list: - * - * nm *.o | grep Hacl | cut -c 20- | sort | uniq | grep -v _Py_LibHacl_ | egrep ^_ | sed 's/_\(.*\)/#define \1 _Py_LibHacl_\1/g' - */ + * Assuming that the current working directory is Modules/_hacl, + * use the following command to generate a list of candidates: -#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 + nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' \ + | sort -u -#define Hacl_Hash_SHA2_state_sha2_224_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224_s -#define Hacl_Hash_SHA2_state_sha2_224 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224 -#define Hacl_Hash_SHA2_state_sha2_256 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_256 -#define Hacl_Hash_SHA2_state_sha2_384_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384_s -#define Hacl_Hash_SHA2_state_sha2_384 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384 -#define Hacl_Hash_SHA2_state_sha2_512 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_512 -#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 -#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 -#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 -#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 -#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 -#define Hacl_Hash_SHA2_copy_224 _Py_LibHacl_Hacl_Hash_SHA2_copy_224 -#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 -#define Hacl_Hash_SHA2_copy_384 _Py_LibHacl_Hacl_Hash_SHA2_copy_384 -#define Hacl_Hash_SHA2_init_256 _Py_LibHacl_Hacl_Hash_SHA2_init_256 -#define Hacl_Hash_SHA2_init_224 _Py_LibHacl_Hacl_Hash_SHA2_init_224 -#define Hacl_Hash_SHA2_init_512 _Py_LibHacl_Hacl_Hash_SHA2_init_512 -#define Hacl_Hash_SHA2_init_384 _Py_LibHacl_Hacl_Hash_SHA2_init_384 -#define Hacl_SHA2_Scalar32_sha512_init _Py_LibHacl_Hacl_SHA2_Scalar32_sha512_init -#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 -#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 -#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 -#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 -#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 -#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 -#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 -#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 -#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 -#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 -#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 -#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 -#define Hacl_Hash_SHA2_sha256 _Py_LibHacl_Hacl_Hash_SHA2_sha256 -#define Hacl_Hash_SHA2_sha224 _Py_LibHacl_Hacl_Hash_SHA2_sha224 -#define Hacl_Hash_SHA2_sha512 _Py_LibHacl_Hacl_Hash_SHA2_sha512 -#define Hacl_Hash_SHA2_sha384 _Py_LibHacl_Hacl_Hash_SHA2_sha384 + * Compare the entries to add as follows: -#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc -#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init -#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update -#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest -#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free -#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy -#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash - -#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc -#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init -#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update -#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest -#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free -#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy -#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash - -#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 -#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 -#define Hacl_Impl_SHA3_absorb_inner _Py_LibHacl_Hacl_Impl_SHA3_absorb_inner -#define Hacl_Impl_SHA3_keccak _Py_LibHacl_Hacl_Impl_SHA3_keccak -#define Hacl_Impl_SHA3_loadState _Py_LibHacl_Hacl_Impl_SHA3_loadState -#define Hacl_Impl_SHA3_squeeze _Py_LibHacl_Hacl_Impl_SHA3_squeeze -#define Hacl_Impl_SHA3_state_permute _Py_LibHacl_Hacl_Impl_SHA3_state_permute -#define Hacl_SHA3_sha3_224 _Py_LibHacl_Hacl_SHA3_sha3_224 -#define Hacl_SHA3_sha3_256 _Py_LibHacl_Hacl_SHA3_sha3_256 -#define Hacl_SHA3_sha3_384 _Py_LibHacl_Hacl_SHA3_sha3_384 -#define Hacl_SHA3_sha3_512 _Py_LibHacl_Hacl_SHA3_sha3_512 -#define Hacl_SHA3_shake128_hacl _Py_LibHacl_Hacl_SHA3_shake128_hacl -#define Hacl_SHA3_shake256_hacl _Py_LibHacl_Hacl_SHA3_shake256_hacl -#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len -#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy -#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest -#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free -#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg -#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len -#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake -#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ -#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc -#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset -#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update -#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze + diff -y --suppress-common-lines \ + <(grep -P '^#define (?!_PY.+_H)' python_hacl_namespaces.h | sort -u) \ + <(nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' | sort -u) + */ +// --- Utils ------------------------------------------------------------------ +#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 +// --- HASH-BLAKE-2b ---------------------------------------------------------- +#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy +#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest +#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish +#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free +#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key +#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params +#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info +#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init +#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc +#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key +#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key +#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset +#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key +#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_Simd256_copy _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy #define Hacl_Hash_Blake2b_Simd256_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy_internal_state #define Hacl_Hash_Blake2b_Simd256_digest _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_digest @@ -104,7 +52,6 @@ #define Hacl_Hash_Blake2b_Simd256_malloc _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc #define Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key #define Hacl_Hash_Blake2b_Simd256_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key -#define Hacl_Hash_Blake2b_Simd256_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key0 #define Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key #define Hacl_Hash_Blake2b_Simd256_reset _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset #define Hacl_Hash_Blake2b_Simd256_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset_with_key @@ -115,23 +62,24 @@ #define Hacl_Hash_Blake2b_Simd256_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_last_no_inline #define Hacl_Hash_Blake2b_Simd256_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi #define Hacl_Hash_Blake2b_Simd256_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi_no_inline -#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy -#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest -#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish -#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free -#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key -#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params -#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info -#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init -#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc -#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key -#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key -#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset -#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key -#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_update _Py_LibHacl_Hacl_Hash_Blake2b_update #define Hacl_Hash_Blake2b_update_last _Py_LibHacl_Hacl_Hash_Blake2b_update_last #define Hacl_Hash_Blake2b_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_update_multi +// --- HASH-BLAKE-2s ---------------------------------------------------------- +#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy +#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest +#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish +#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free +#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key +#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params +#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info +#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init +#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc +#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key +#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key +#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset +#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key +#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_Simd128_copy _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy #define Hacl_Hash_Blake2s_Simd128_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy_internal_state #define Hacl_Hash_Blake2s_Simd128_digest _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_digest @@ -145,7 +93,6 @@ #define Hacl_Hash_Blake2s_Simd128_malloc _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc #define Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key #define Hacl_Hash_Blake2s_Simd128_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key -#define Hacl_Hash_Blake2s_Simd128_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key0 #define Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key #define Hacl_Hash_Blake2s_Simd128_reset _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset #define Hacl_Hash_Blake2s_Simd128_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset_with_key @@ -156,37 +103,54 @@ #define Hacl_Hash_Blake2s_Simd128_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_last_no_inline #define Hacl_Hash_Blake2s_Simd128_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi #define Hacl_Hash_Blake2s_Simd128_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi_no_inline -#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy -#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest -#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish -#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free -#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key -#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params -#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info -#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init -#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc -#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key -#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key -#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset -#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key -#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_update _Py_LibHacl_Hacl_Hash_Blake2s_update #define Hacl_Hash_Blake2s_update_last _Py_LibHacl_Hacl_Hash_Blake2s_update_last #define Hacl_Hash_Blake2s_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_update_multi +// --- HASH-MD5 --------------------------------------------------------------- +#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy +#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest #define Hacl_Hash_MD5_finish _Py_LibHacl_Hacl_Hash_MD5_finish +#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free +#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash #define Hacl_Hash_MD5_hash_oneshot _Py_LibHacl_Hacl_Hash_MD5_hash_oneshot +#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init +#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc #define Hacl_Hash_MD5_reset _Py_LibHacl_Hacl_Hash_MD5_reset +#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update #define Hacl_Hash_MD5_update_last _Py_LibHacl_Hacl_Hash_MD5_update_last #define Hacl_Hash_MD5_update_multi _Py_LibHacl_Hacl_Hash_MD5_update_multi +// --- HASH-SHA-1 ------------------------------------------------------------- +#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy +#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest #define Hacl_Hash_SHA1_finish _Py_LibHacl_Hacl_Hash_SHA1_finish +#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free +#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash #define Hacl_Hash_SHA1_hash_oneshot _Py_LibHacl_Hacl_Hash_SHA1_hash_oneshot +#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init +#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc #define Hacl_Hash_SHA1_reset _Py_LibHacl_Hacl_Hash_SHA1_reset +#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update #define Hacl_Hash_SHA1_update_last _Py_LibHacl_Hacl_Hash_SHA1_update_last #define Hacl_Hash_SHA1_update_multi _Py_LibHacl_Hacl_Hash_SHA1_update_multi +// --- HASH-SHA-2 ------------------------------------------------------------- +#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 +#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 +#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 +#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 +#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 +#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 +#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 +#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 +#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 +#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 #define Hacl_Hash_SHA2_hash_224 _Py_LibHacl_Hacl_Hash_SHA2_hash_224 #define Hacl_Hash_SHA2_hash_256 _Py_LibHacl_Hacl_Hash_SHA2_hash_256 #define Hacl_Hash_SHA2_hash_384 _Py_LibHacl_Hacl_Hash_SHA2_hash_384 #define Hacl_Hash_SHA2_hash_512 _Py_LibHacl_Hacl_Hash_SHA2_hash_512 +#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 +#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 +#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 +#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 #define Hacl_Hash_SHA2_reset_224 _Py_LibHacl_Hacl_Hash_SHA2_reset_224 #define Hacl_Hash_SHA2_reset_256 _Py_LibHacl_Hacl_Hash_SHA2_reset_256 #define Hacl_Hash_SHA2_reset_384 _Py_LibHacl_Hacl_Hash_SHA2_reset_384 @@ -207,10 +171,25 @@ #define Hacl_Hash_SHA2_sha512_init _Py_LibHacl_Hacl_Hash_SHA2_sha512_init #define Hacl_Hash_SHA2_sha512_update_last _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_last #define Hacl_Hash_SHA2_sha512_update_nblocks _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_nblocks +#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 +#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 +#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 +#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 +// --- HASH-SHA-3 ------------------------------------------------------------- #define Hacl_Hash_SHA3_absorb_inner_32 _Py_LibHacl_Hacl_Hash_SHA3_absorb_inner_32 +#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len +#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy +#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest +#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free +#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg +#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len +#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ +#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake #define Hacl_Hash_SHA3_keccak_piln _Py_LibHacl_Hacl_Hash_SHA3_keccak_piln #define Hacl_Hash_SHA3_keccak_rndc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rndc #define Hacl_Hash_SHA3_keccak_rotc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rotc +#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc +#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset #define Hacl_Hash_SHA3_sha3_224 _Py_LibHacl_Hacl_Hash_SHA3_sha3_224 #define Hacl_Hash_SHA3_sha3_256 _Py_LibHacl_Hacl_Hash_SHA3_sha3_256 #define Hacl_Hash_SHA3_sha3_384 _Py_LibHacl_Hacl_Hash_SHA3_sha3_384 @@ -220,37 +199,39 @@ #define Hacl_Hash_SHA3_shake128_absorb_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_absorb_nblocks #define Hacl_Hash_SHA3_shake128_squeeze_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_squeeze_nblocks #define Hacl_Hash_SHA3_shake256 _Py_LibHacl_Hacl_Hash_SHA3_shake256 +#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze #define Hacl_Hash_SHA3_state_free _Py_LibHacl_Hacl_Hash_SHA3_state_free #define Hacl_Hash_SHA3_state_malloc _Py_LibHacl_Hacl_Hash_SHA3_state_malloc - -// Streaming HMAC +#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update +#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 +#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 +// --- STREAMING-MAC ---------------------------------------------------------- +#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy +#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest +#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free +#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_index_of_state _Py_LibHacl_Hacl_Streaming_HMAC_index_of_state #define Hacl_Streaming_HMAC_malloc_ _Py_LibHacl_Hacl_Streaming_HMAC_malloc_ -#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_reset _Py_LibHacl_Hacl_Streaming_HMAC_reset -#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update -#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest -#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy -#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free #define Hacl_Streaming_HMAC_s1 _Py_LibHacl_Hacl_Streaming_HMAC_s1 #define Hacl_Streaming_HMAC_s2 _Py_LibHacl_Hacl_Streaming_HMAC_s2 - -// HMAC-MD5 +#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update +// --- HMAC-MD5 --------------------------------------------------------------- #define Hacl_HMAC_compute_md5 _Py_LibHacl_Hacl_HMAC_compute_md5 -// HMAC-SHA-1 +// --- HMAC-SHA-1 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha1 _Py_LibHacl_Hacl_HMAC_compute_sha1 -// HMAC-SHA-2 +// --- HMAC-SHA-2 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha2_224 _Py_LibHacl_Hacl_HMAC_compute_sha2_224 #define Hacl_HMAC_compute_sha2_256 _Py_LibHacl_Hacl_HMAC_compute_sha2_256 #define Hacl_HMAC_compute_sha2_384 _Py_LibHacl_Hacl_HMAC_compute_sha2_384 #define Hacl_HMAC_compute_sha2_512 _Py_LibHacl_Hacl_HMAC_compute_sha2_512 -// HMAC-SHA-3 +// --- HMAC-SHA-3 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha3_224 _Py_LibHacl_Hacl_HMAC_compute_sha3_224 #define Hacl_HMAC_compute_sha3_256 _Py_LibHacl_Hacl_HMAC_compute_sha3_256 #define Hacl_HMAC_compute_sha3_384 _Py_LibHacl_Hacl_HMAC_compute_sha3_384 #define Hacl_HMAC_compute_sha3_512 _Py_LibHacl_Hacl_HMAC_compute_sha3_512 -// HMAC-BLAKE -#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 +// --- HMAC-BLAKE-2 ----------------------------------------------------------- #define Hacl_HMAC_compute_blake2b_32 _Py_LibHacl_Hacl_HMAC_compute_blake2b_32 +#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 #endif // _PYTHON_HACL_NAMESPACES_H diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index d91650b44bb4e7..72ceb27b7f70e8 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=7720f6d4fc0468a99d5ea6120976bcc271e42727 +expected_hacl_star_rev=8ba599b2f6c9701b3dc961db895b0856a2210f76 hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 48eed5eac975ed..c8a76e14990751 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -943,9 +943,9 @@ static PyType_Spec EVPXOFtype_spec = { #endif -static PyObject* -py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, - int usedforsecurity) +static PyObject * +_hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, + int usedforsecurity) { Py_buffer view = { 0 }; PY_EVP_MD *digest = NULL; @@ -1017,16 +1017,25 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, return (PyObject *)self; } +#define CALL_HASHLIB_NEW(MODULE, NAME, DATA, STRING, USEDFORSECURITY) \ + do { \ + PyObject *data_obj; \ + if (_Py_hashlib_data_argument(&data_obj, DATA, STRING) < 0) { \ + return NULL; \ + } \ + return _hashlib_HASH(MODULE, NAME, data_obj, USEDFORSECURITY); \ + } while (0) /* The module-level function: new() */ /*[clinic input] -_hashlib.new as EVP_new +_hashlib.new - name as name_obj: object - string as data_obj: object(c_default="NULL") = b'' + name: str + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new hash object using the named algorithm. @@ -1037,136 +1046,137 @@ The MD5 and SHA1 algorithms are always supported. [clinic start generated code]*/ static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=ddd5053f92dffe90 input=c24554d0337be1b0]*/ +_hashlib_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=c01feb4ad6a6303d input=f5ec9bf1fa749d07]*/ { - char *name; - if (!PyArg_Parse(name_obj, "s", &name)) { - PyErr_SetString(PyExc_TypeError, "name must be a string"); - return NULL; - } - return py_evp_fromname(module, name, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, name, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_md5 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a md5 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=87b0186440a44f8c input=990e36d5e689b16e]*/ +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=ca8cf184d90f7432 input=e7c0adbd6a867db1]*/ { - return py_evp_fromname(module, Py_hash_md5, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_md5, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha1 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha1 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=6813024cf690670d input=948f2f4b6deabc10]*/ +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=1736fb7b310d64be input=f7e5bb1711e952d8]*/ { - return py_evp_fromname(module, Py_hash_sha1, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha1, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=a2dfe7cc4eb14ebb input=f9272821fadca505]*/ +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=0d6ff57be5e5c140 input=3820fff7ed3a53b8]*/ { - return py_evp_fromname(module, Py_hash_sha224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=1f874a34870f0a68 input=549fad9d2930d4c5]*/ +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=412ea7111555b6e7 input=9a2f115cf1f7e0eb]*/ { - return py_evp_fromname(module, Py_hash_sha256, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=58529eff9ca457b2 input=48601a6e3bf14ad7]*/ +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=2e0dc395b59ed726 input=1ea48f6f01e77cfb]*/ { - return py_evp_fromname(module, Py_hash_sha384, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2c744c9e4a40d5f6 input=c5c46a2a817aa98f]*/ +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4bdd760388dbfc0f input=3cf56903e07d1f5c]*/ { - return py_evp_fromname(module, Py_hash_sha512, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha512, data, string, usedforsecurity); } @@ -1175,77 +1185,81 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_sha3_224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=144641c1d144b974 input=e3a01b2888916157]*/ +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=6d8dc2a924f3ba35 input=7f14f16a9f6a3158]*/ { - return py_evp_fromname(module, Py_hash_sha3_224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=c61f1ab772d06668 input=e2908126c1b6deed]*/ +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=9e520f537b3a4622 input=7987150939d5e352]*/ { - return py_evp_fromname(module, Py_hash_sha3_256, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=f68e4846858cf0ee input=ec0edf5c792f8252]*/ +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=d239ba0463fd6138 input=fc943401f67e3b81]*/ { - return py_evp_fromname(module, Py_hash_sha3_384, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2eede478c159354a input=64e2cc0c094d56f4]*/ +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=17662f21038c2278 input=6601ddd2c6c1516d]*/ { - return py_evp_fromname(module, Py_hash_sha3_512, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_512, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHA3 */ @@ -1253,42 +1267,46 @@ _hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_shake_128 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-128 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=bc49cdd8ada1fa97 input=6c9d67440eb33ec8]*/ +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4e6afed8d18980ad input=373c3f1c93d87b37]*/ { - return py_evp_fromname(module, Py_hash_shake_128, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_128, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_shake_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-256 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=358d213be8852df7 input=479cbe9fefd4a9f8]*/ +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=62481bce4a77d16c input=101c139ea2ddfcbf]*/ { - return py_evp_fromname(module, Py_hash_shake_256, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_256, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHAKE */ +#undef CALL_HASHLIB_NEW + /*[clinic input] _hashlib.pbkdf2_hmac as pbkdf2_hmac @@ -2134,7 +2152,7 @@ _hashlib_compare_digest_impl(PyObject *module, PyObject *a, PyObject *b) /* List of functions exported by this module */ static struct PyMethodDef EVP_functions[] = { - EVP_NEW_METHODDEF + _HASHLIB_NEW_METHODDEF PBKDF2_HMAC_METHODDEF _HASHLIB_SCRYPT_METHODDEF _HASHLIB_GET_FIPS_MODE_METHODDEF diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 095866eec7d75a..05d01acd77109b 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -11,7 +11,8 @@ annotated by François Pinard, and converted to C by Raymond Hettinger. #endif #include "Python.h" -#include "pycore_list.h" // _PyList_ITEMS() +#include "pycore_list.h" // _PyList_ITEMS(), _PyList_AppendTakeRef() +#include "pycore_pyatomic_ft_wrappers.h" #include "clinic/_heapqmodule.c.h" @@ -59,8 +60,8 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -108,8 +109,8 @@ siftup(PyListObject *heap, Py_ssize_t pos) /* Move the smaller child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -117,6 +118,7 @@ siftup(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush heap: object(subclass_of='&PyList_Type') @@ -128,13 +130,22 @@ Push item onto heap, maintaining the heap invariant. static PyObject * _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=912c094f47663935 input=7c69611f3698aceb]*/ +/*[clinic end generated code: output=912c094f47663935 input=f7a4f03ef8d52e67]*/ { - if (PyList_Append(heap, item)) + if (item == NULL) { + PyErr_BadInternalCall(); return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { + return NULL; + } - if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) + if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) { return NULL; + } Py_RETURN_NONE; } @@ -162,8 +173,9 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) if (!n) return lastelt; returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, lastelt); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], lastelt); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -171,6 +183,7 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heappop heap: object(subclass_of='&PyList_Type') @@ -181,7 +194,7 @@ Pop the smallest item off the heap, maintaining the heap invariant. static PyObject * _heapq_heappop_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=96dfe82d37d9af76 input=91487987a583c856]*/ +/*[clinic end generated code: output=96dfe82d37d9af76 input=ed396461b153dd51]*/ { return heappop_internal(heap, siftup); } @@ -197,8 +210,9 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -207,6 +221,7 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec /*[clinic input] +@critical_section heap _heapq.heapreplace heap: object(subclass_of='&PyList_Type') @@ -226,12 +241,13 @@ this routine unless written as part of a conditional replacement: static PyObject * _heapq_heapreplace_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=82ea55be8fbe24b4 input=719202ac02ba10c8]*/ +/*[clinic end generated code: output=82ea55be8fbe24b4 input=9be1678b817ef1a9]*/ { return heapreplace_internal(heap, item, siftup); } /*[clinic input] +@critical_section heap _heapq.heappushpop heap: object(subclass_of='&PyList_Type') @@ -246,7 +262,7 @@ a separate call to heappop(). static PyObject * _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=67231dc98ed5774f input=5dc701f1eb4a4aa7]*/ +/*[clinic end generated code: output=67231dc98ed5774f input=db05c81b1dd92c44]*/ { PyObject *returnitem; int cmp; @@ -271,8 +287,9 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -371,6 +388,7 @@ heapify_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heapify heap: object(subclass_of='&PyList_Type') @@ -381,7 +399,7 @@ Transform list into a heap, in-place, in O(len(heap)) time. static PyObject * _heapq_heapify_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=e63a636fcf83d6d0 input=53bb7a2166febb73]*/ +/*[clinic end generated code: output=e63a636fcf83d6d0 input=aaaaa028b9b6af08]*/ { return heapify_internal(heap, siftup); } @@ -423,8 +441,8 @@ siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -445,11 +463,11 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } - /* Bubble up the smaller child until hitting a leaf. */ + /* Bubble up the larger child until hitting a leaf. */ arr = _PyList_ITEMS(heap); limit = endpos >> 1; /* smallest pos that has no child */ while (pos < limit) { - /* Set childpos to index of smaller child. */ + /* Set childpos to index of larger child. */ childpos = 2*pos + 1; /* leftmost child position */ if (childpos + 1 < endpos) { PyObject* a = arr[childpos + 1]; @@ -469,11 +487,11 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } } - /* Move the smaller child up. */ + /* Move the larger child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -481,6 +499,7 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush_max heap: object(subclass_of='&PyList_Type') @@ -492,9 +511,16 @@ Push item onto max heap, maintaining the heap invariant. static PyObject * _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=c869d5f9deb08277 input=4743d7db137b6e2b]*/ +/*[clinic end generated code: output=c869d5f9deb08277 input=c437e3d1ff8dcb70]*/ { - if (PyList_Append(heap, item)) { + if (item == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { return NULL; } @@ -506,6 +532,7 @@ _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) } /*[clinic input] +@critical_section heap _heapq.heappop_max heap: object(subclass_of='&PyList_Type') @@ -516,12 +543,13 @@ Maxheap variant of heappop. static PyObject * _heapq_heappop_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=2f051195ab404b77 input=e62b14016a5a26de]*/ +/*[clinic end generated code: output=2f051195ab404b77 input=5d70c997798aec64]*/ { return heappop_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapreplace_max heap: object(subclass_of='&PyList_Type') @@ -533,12 +561,13 @@ Maxheap variant of heapreplace. static PyObject * _heapq_heapreplace_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=8770778b5a9cbe9b input=21a3d28d757c881c]*/ +/*[clinic end generated code: output=8770778b5a9cbe9b input=fe70175356e4a649]*/ { return heapreplace_internal(heap, item, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapify_max heap: object(subclass_of='&PyList_Type') @@ -549,12 +578,13 @@ Maxheap variant of heapify. static PyObject * _heapq_heapify_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=8401af3856529807 input=edda4255728c431e]*/ +/*[clinic end generated code: output=8401af3856529807 input=4eee63231e7d1573]*/ { return heapify_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heappushpop_max heap: object(subclass_of='&PyList_Type') @@ -569,7 +599,7 @@ a separate call to heappop_max(). static PyObject * _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=ff0019f0941aca0d input=525a843013cbd6c0]*/ +/*[clinic end generated code: output=ff0019f0941aca0d input=24d0defa6fd6df4a]*/ { PyObject *returnitem; int cmp; @@ -595,8 +625,9 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_max((PyListObject *)heap, 0) < 0) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_max(list, 0) < 0) { Py_DECREF(returnitem); return NULL; } diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 172cebcaa4884f..ef9cf01ecbec5e 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -20,9 +20,11 @@ #endif #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -218,6 +220,22 @@ wait_for_lock(PyThread_type_lock mutex, PY_TIMEOUT_T timeout) return 0; } +static int +ensure_highlevel_module_loaded(void) +{ + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._channels"); + if (highlevel == NULL) { + PyErr_Clear(); + highlevel = PyImport_ImportModule("test.support.channels"); + if (highlevel == NULL) { + return -1; + } + } + Py_DECREF(highlevel); + return 0; +} + /* module state *************************************************************/ @@ -252,10 +270,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); @@ -493,12 +511,12 @@ _waiting_release(_waiting_t *waiting, int received) assert(!waiting->received); waiting->status = WAITING_RELEASING; - PyThread_release_lock(waiting->mutex); if (waiting->received != received) { assert(received == 1); waiting->received = received; } waiting->status = WAITING_RELEASED; + PyThread_release_lock(waiting->mutex); } static void @@ -523,7 +541,7 @@ typedef struct _channelitem { int64_t interpid; _PyXIData_t *data; _waiting_t *waiting; - int unboundop; + unboundop_t unboundop; struct _channelitem *next; } _channelitem; @@ -536,7 +554,7 @@ _channelitem_ID(_channelitem *item) static void _channelitem_init(_channelitem *item, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -562,7 +580,7 @@ _channelitem_clear_data(_channelitem *item, int removed) { if (item->data != NULL) { // It was allocated in channel_send(). - (void)_release_xid_data(item->data, XID_IGNORE_EXC & XID_FREE); + (void)_release_xid_data(item->data, XID_IGNORE_EXC | XID_FREE); item->data = NULL; } @@ -583,7 +601,7 @@ _channelitem_clear(_channelitem *item) static _channelitem * _channelitem_new(int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = GLOBAL_MALLOC(_channelitem); if (item == NULL) { @@ -694,7 +712,7 @@ _channelqueue_free(_channelqueue *queue) static int _channelqueue_put(_channelqueue *queue, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = _channelitem_new(interpid, data, waiting, unboundop); if (item == NULL) { @@ -798,7 +816,7 @@ _channelqueue_remove(_channelqueue *queue, _channelitem_id_t itemid, } queue->count -= 1; - int unboundop; + unboundop_t unboundop; _channelitem_popped(item, p_data, p_waiting, &unboundop); } @@ -1083,16 +1101,18 @@ typedef struct _channel { PyThread_type_lock mutex; _channelqueue *queue; _channelends *ends; - struct { - int unboundop; + struct _channeldefaults { + unboundop_t unboundop; + xidata_fallback_t fallback; } defaults; int open; struct _channel_closing *closing; } _channel_state; static _channel_state * -_channel_new(PyThread_type_lock mutex, int unboundop) +_channel_new(PyThread_type_lock mutex, struct _channeldefaults defaults) { + assert(check_unbound(defaults.unboundop)); _channel_state *chan = GLOBAL_MALLOC(_channel_state); if (chan == NULL) { return NULL; @@ -1109,7 +1129,7 @@ _channel_new(PyThread_type_lock mutex, int unboundop) GLOBAL_FREE(chan); return NULL; } - chan->defaults.unboundop = unboundop; + chan->defaults = defaults; chan->open = 1; chan->closing = NULL; return chan; @@ -1130,7 +1150,7 @@ _channel_free(_channel_state *chan) static int _channel_add(_channel_state *chan, int64_t interpid, - _PyXIData_t *data, _waiting_t *waiting, int unboundop) + _PyXIData_t *data, _waiting_t *waiting, unboundop_t unboundop) { int res = -1; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1611,7 +1631,7 @@ _channels_release_cid_object(_channels *channels, int64_t cid) struct channel_id_and_info { int64_t id; - int unboundop; + struct _channeldefaults defaults; }; static struct channel_id_and_info * @@ -1628,7 +1648,7 @@ _channels_list_all(_channels *channels, int64_t *count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i] = (struct channel_id_and_info){ .id = ref->cid, - .unboundop = ref->chan->defaults.unboundop, + .defaults = ref->chan->defaults, }; } *count = channels->numopen; @@ -1714,13 +1734,13 @@ _channel_finish_closing(_channel_state *chan) { // Create a new channel. static int64_t -channel_create(_channels *channels, int unboundop) +channel_create(_channels *channels, struct _channeldefaults defaults) { PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_CHANNEL_MUTEX_INIT; } - _channel_state *chan = _channel_new(mutex, unboundop); + _channel_state *chan = _channel_new(mutex, defaults); if (chan == NULL) { PyThread_free_lock(mutex); return -1; @@ -1752,7 +1772,7 @@ channel_destroy(_channels *channels, int64_t cid) // Optionally request to be notified when it is received. static int channel_send(_channels *channels, int64_t cid, PyObject *obj, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop, xidata_fallback_t fallback) { PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; @@ -1779,7 +1799,7 @@ channel_send(_channels *channels, int64_t cid, PyObject *obj, PyThread_release_lock(mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, data) != 0) { PyThread_release_lock(mutex); GLOBAL_FREE(data); return -1; @@ -1823,7 +1843,8 @@ channel_clear_sent(_channels *channels, int64_t cid, _waiting_t *waiting) // Like channel_send(), but strictly wait for the object to be received. static int channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, - int unboundop, PY_TIMEOUT_T timeout) + unboundop_t unboundop, PY_TIMEOUT_T timeout, + xidata_fallback_t fallback) { // We use a stack variable here, so we must ensure that &waiting // is not held by any channel item at the point this function exits. @@ -1834,7 +1855,7 @@ channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, } /* Queue up the object. */ - int res = channel_send(channels, cid, obj, &waiting, unboundop); + int res = channel_send(channels, cid, obj, &waiting, unboundop, fallback); if (res < 0) { assert(waiting.status == WAITING_NO_STATUS); goto finally; @@ -2005,6 +2026,20 @@ channel_is_associated(_channels *channels, int64_t cid, int64_t interpid, return (end != NULL && end->open); } +static int +channel_get_defaults(_channels *channels, int64_t cid, struct _channeldefaults *defaults) +{ + PyThread_type_lock mutex = NULL; + _channel_state *channel = NULL; + int err = _channels_lookup(channels, cid, &mutex, &channel); + if (err != 0) { + return err; + } + *defaults = channel->defaults; + PyThread_release_lock(mutex); + return 0; +} + static int _channel_get_count(_channels *channels, int64_t cid, Py_ssize_t *p_count) { @@ -2694,7 +2729,7 @@ add_channelid_type(PyObject *mod) Py_DECREF(cls); return NULL; } - if (ensure_xid_class(cls, _channelid_shared) < 0) { + if (ensure_xid_class(cls, GETDATA(_channelid_shared)) < 0) { Py_DECREF(cls); return NULL; } @@ -2723,15 +2758,9 @@ _get_current_channelend_type(int end) } if (cls == NULL) { // Force the module to be loaded, to register the type. - PyObject *highlevel = PyImport_ImportModule("interpreters.channels"); - if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.channels"); - if (highlevel == NULL) { - return NULL; - } + if (ensure_highlevel_module_loaded() < 0) { + return NULL; } - Py_DECREF(highlevel); if (end == CHANNEL_SEND) { cls = state->send_channel_type; } @@ -2797,12 +2826,12 @@ set_channelend_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv) // Add and register the types. state->send_channel_type = (PyTypeObject *)Py_NewRef(send); state->recv_channel_type = (PyTypeObject *)Py_NewRef(recv); - if (ensure_xid_class(send, _channelend_shared) < 0) { + if (ensure_xid_class(send, GETDATA(_channelend_shared)) < 0) { Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); return -1; } - if (ensure_xid_class(recv, _channelend_shared) < 0) { + if (ensure_xid_class(recv, GETDATA(_channelend_shared)) < 0) { (void)clear_xid_class(state->send_channel_type); Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); @@ -2881,20 +2910,27 @@ clear_interpreter(void *data) static PyObject * channelsmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"unboundop", NULL}; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "i:create", kwlist, - &unboundop)) + static char *kwlist[] = {"unboundop", "fallback", NULL}; + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:create", kwlist, + &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _channeldefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t cid = channel_create(&_globals.channels, unboundop); + int64_t cid = channel_create(&_globals.channels, defaults); if (cid < 0) { (void)handle_channel_error(-1, self, cid); return NULL; @@ -2987,7 +3023,9 @@ channelsmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } assert(cidobj != NULL); - PyObject *item = Py_BuildValue("Oi", cidobj, cur->unboundop); + PyObject *item = Py_BuildValue("Oii", cidobj, + cur->defaults.unboundop, + cur->defaults.fallback); Py_DECREF(cidobj); if (item == NULL) { Py_SETREF(ids, NULL); @@ -3075,40 +3113,54 @@ receive end."); static PyObject * channelsmod_send(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; + int unboundarg = -1; + int fallbackarg = -1; int blocking = 1; PyObject *timeout_obj = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|i$pO:channel_send", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O|ii$pO:channel_send", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); - return NULL; - } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } /* Queue up the object. */ int err = 0; if (blocking) { - err = channel_send_wait(&_globals.channels, cid, obj, unboundop, timeout); + err = channel_send_wait( + &_globals.channels, cid, obj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, obj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, obj, NULL, unboundop, fallback); } if (handle_channel_error(err, self, cid)) { return NULL; @@ -3126,32 +3178,44 @@ By default this waits for the object to be received."); static PyObject * channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; - int blocking = 1; + int unboundarg = -1; + int fallbackarg = -1; + int blocking = -1; PyObject *timeout_obj = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O&O|i$pO:channel_send_buffer", kwlist, + "O&O|ii$pO:channel_send_buffer", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) { - return NULL; - } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) + { return NULL; } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } PyObject *tempobj = PyMemoryView_FromObject(obj); if (tempobj == NULL) { @@ -3162,10 +3226,11 @@ channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) int err = 0; if (blocking) { err = channel_send_wait( - &_globals.channels, cid, tempobj, unboundop, timeout); + &_globals.channels, cid, tempobj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, tempobj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, tempobj, NULL, unboundop, fallback); } Py_DECREF(tempobj); if (handle_channel_error(err, self, cid)) { @@ -3197,7 +3262,7 @@ channelsmod_recv(PyObject *self, PyObject *args, PyObject *kwds) cid = cid_data.cid; PyObject *obj = NULL; - int unboundop = 0; + unboundop_t unboundop = 0; int err = channel_recv(&_globals.channels, cid, &obj, &unboundop); if (err == ERR_CHANNEL_EMPTY && dflt != NULL) { // Use the default. @@ -3388,17 +3453,14 @@ channelsmod_get_channel_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t cid = cid_data.cid; - PyThread_type_lock mutex = NULL; - _channel_state *channel = NULL; - int err = _channels_lookup(&_globals.channels, cid, &mutex, &channel); + struct _channeldefaults defaults = {0}; + int err = channel_get_defaults(&_globals.channels, cid, &defaults); if (handle_channel_error(err, self, cid)) { return NULL; } - int unboundop = channel->defaults.unboundop; - PyThread_release_lock(mutex); - PyObject *defaults = Py_BuildValue("i", unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(channelsmod_get_channel_defaults_doc, @@ -3552,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -3563,8 +3624,7 @@ module_clear(PyObject *mod) assert(state != NULL); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 526249a0e1aec3..872495d32fbf8f 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -9,9 +9,11 @@ #include "pycore_crossinterp.h" // _PyXIData_t #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -134,13 +136,10 @@ idarg_int64_converter(PyObject *arg, void *ptr) static int ensure_highlevel_module_loaded(void) { - PyObject *highlevel = PyImport_ImportModule("interpreters.queues"); + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._queues"); if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.queues"); - if (highlevel == NULL) { - return -1; - } + return -1; } Py_DECREF(highlevel); return 0; @@ -297,7 +296,7 @@ add_QueueError(PyObject *mod) { module_state *state = get_module_state(mod); -#define PREFIX "test.support.interpreters." +#define PREFIX "concurrent.interpreters." #define ADD_EXCTYPE(NAME, BASE, DOC) \ assert(state->NAME == NULL); \ if (add_exctype(mod, &state->NAME, PREFIX #NAME, DOC, BASE) < 0) { \ @@ -401,14 +400,13 @@ typedef struct _queueitem { meaning the interpreter has been destroyed. */ int64_t interpid; _PyXIData_t *data; - int fmt; - int unboundop; + unboundop_t unboundop; struct _queueitem *next; } _queueitem; static void _queueitem_init(_queueitem *item, - int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) + int64_t interpid, _PyXIData_t *data, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -422,7 +420,6 @@ _queueitem_init(_queueitem *item, *item = (_queueitem){ .interpid = interpid, .data = data, - .fmt = fmt, .unboundop = unboundop, }; } @@ -434,7 +431,7 @@ _queueitem_clear_data(_queueitem *item) return; } // It was allocated in queue_put(). - (void)_release_xid_data(item->data, XID_IGNORE_EXC & XID_FREE); + (void)_release_xid_data(item->data, XID_IGNORE_EXC | XID_FREE); item->data = NULL; } @@ -446,14 +443,14 @@ _queueitem_clear(_queueitem *item) } static _queueitem * -_queueitem_new(int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) +_queueitem_new(int64_t interpid, _PyXIData_t *data, int unboundop) { _queueitem *item = GLOBAL_MALLOC(_queueitem); if (item == NULL) { PyErr_NoMemory(); return NULL; } - _queueitem_init(item, interpid, data, fmt, unboundop); + _queueitem_init(item, interpid, data, unboundop); return item; } @@ -476,10 +473,9 @@ _queueitem_free_all(_queueitem *item) static void _queueitem_popped(_queueitem *item, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) + _PyXIData_t **p_data, unboundop_t *p_unboundop) { *p_data = item->data; - *p_fmt = item->fmt; *p_unboundop = item->unboundop; // We clear them here, so they won't be released in _queueitem_clear(). item->data = NULL; @@ -527,16 +523,16 @@ typedef struct _queue { _queueitem *first; _queueitem *last; } items; - struct { - int fmt; + struct _queuedefaults { + xidata_fallback_t fallback; int unboundop; } defaults; } _queue; static int -_queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) +_queue_init(_queue *queue, Py_ssize_t maxsize, struct _queuedefaults defaults) { - assert(check_unbound(unboundop)); + assert(check_unbound(defaults.unboundop)); PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_QUEUE_ALLOC; @@ -547,10 +543,7 @@ _queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) .items = { .maxsize = maxsize, }, - .defaults = { - .fmt = fmt, - .unboundop = unboundop, - }, + .defaults = defaults, }; return 0; } @@ -631,8 +624,7 @@ _queue_unlock(_queue *queue) } static int -_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, - int fmt, int unboundop) +_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, int unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -648,7 +640,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, return ERR_QUEUE_FULL; } - _queueitem *item = _queueitem_new(interpid, data, fmt, unboundop); + _queueitem *item = _queueitem_new(interpid, data, unboundop); if (item == NULL) { _queue_unlock(queue); return -1; @@ -668,8 +660,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, } static int -_queue_next(_queue *queue, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) +_queue_next(_queue *queue, _PyXIData_t **p_data, int *p_unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -688,7 +679,7 @@ _queue_next(_queue *queue, } queue->items.count -= 1; - _queueitem_popped(item, p_data, p_fmt, p_unboundop); + _queueitem_popped(item, p_data, p_unboundop); _queue_unlock(queue); return 0; @@ -716,8 +707,11 @@ _queue_is_full(_queue *queue, int *p_is_full) return err; } - assert(queue->items.count <= queue->items.maxsize); - *p_is_full = queue->items.count == queue->items.maxsize; + assert(queue->items.maxsize <= 0 + || queue->items.count <= queue->items.maxsize); + *p_is_full = queue->items.maxsize > 0 + ? queue->items.count == queue->items.maxsize + : 0; _queue_unlock(queue); return 0; @@ -1035,8 +1029,7 @@ _queues_decref(_queues *queues, int64_t qid) struct queue_id_and_info { int64_t id; - int fmt; - int unboundop; + struct _queuedefaults defaults; }; static struct queue_id_and_info * @@ -1053,8 +1046,7 @@ _queues_list_all(_queues *queues, int64_t *p_count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i].id = ref->qid; assert(ref->queue != NULL); - ids[i].fmt = ref->queue->defaults.fmt; - ids[i].unboundop = ref->queue->defaults.unboundop; + ids[i].defaults = ref->queue->defaults; } *p_count = queues->count; @@ -1090,13 +1082,14 @@ _queue_free(_queue *queue) // Create a new queue. static int64_t -queue_create(_queues *queues, Py_ssize_t maxsize, int fmt, int unboundop) +queue_create(_queues *queues, Py_ssize_t maxsize, + struct _queuedefaults defaults) { _queue *queue = GLOBAL_MALLOC(_queue); if (queue == NULL) { return ERR_QUEUE_ALLOC; } - int err = _queue_init(queue, maxsize, fmt, unboundop); + int err = _queue_init(queue, maxsize, defaults); if (err < 0) { GLOBAL_FREE(queue); return (int64_t)err; @@ -1125,7 +1118,8 @@ queue_destroy(_queues *queues, int64_t qid) // Push an object onto the queue. static int -queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) +queue_put(_queues *queues, int64_t qid, PyObject *obj, unboundop_t unboundop, + xidata_fallback_t fallback) { PyThreadState *tstate = PyThreadState_Get(); @@ -1138,27 +1132,27 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) assert(queue != NULL); // Convert the object to cross-interpreter data. - _PyXIData_t *data = _PyXIData_New(); - if (data == NULL) { + _PyXIData_t *xidata = _PyXIData_New(); + if (xidata == NULL) { _queue_unmark_waiter(queue, queues->mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) { _queue_unmark_waiter(queue, queues->mutex); - GLOBAL_FREE(data); + GLOBAL_FREE(xidata); return -1; } - assert(_PyXIData_INTERPID(data) == + assert(_PyXIData_INTERPID(xidata) == PyInterpreterState_GetID(tstate->interp)); // Add the data to the queue. int64_t interpid = -1; // _queueitem_init() will set it. - int res = _queue_add(queue, interpid, data, fmt, unboundop); + int res = _queue_add(queue, interpid, xidata, unboundop); _queue_unmark_waiter(queue, queues->mutex); if (res != 0) { // We may chain an exception here: - (void)_release_xid_data(data, 0); - GLOBAL_FREE(data); + (void)_release_xid_data(xidata, 0); + GLOBAL_FREE(xidata); return res; } @@ -1169,7 +1163,7 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) // XXX Support a "wait" mutex? static int queue_get(_queues *queues, int64_t qid, - PyObject **res, int *p_fmt, int *p_unboundop) + PyObject **res, int *p_unboundop) { int err; *res = NULL; @@ -1185,7 +1179,7 @@ queue_get(_queues *queues, int64_t qid, // Pop off the next item from the queue. _PyXIData_t *data = NULL; - err = _queue_next(queue, &data, p_fmt, p_unboundop); + err = _queue_next(queue, &data, p_unboundop); _queue_unmark_waiter(queue, queues->mutex); if (err != 0) { return err; @@ -1216,6 +1210,20 @@ queue_get(_queues *queues, int64_t qid, return 0; } +static int +queue_get_defaults(_queues *queues, int64_t qid, + struct _queuedefaults *p_defaults) +{ + _queue *queue = NULL; + int err = _queues_lookup(queues, qid, &queue); + if (err != 0) { + return err; + } + *p_defaults = queue->defaults; + _queue_unmark_waiter(queue, queues->mutex); + return 0; +} + static int queue_get_maxsize(_queues *queues, int64_t qid, Py_ssize_t *p_maxsize) { @@ -1270,7 +1278,7 @@ set_external_queue_type(module_state *state, PyTypeObject *queue_type) } // Add and register the new type. - if (ensure_xid_class(queue_type, _queueobj_shared) < 0) { + if (ensure_xid_class(queue_type, GETDATA(_queueobj_shared)) < 0) { return -1; } state->queue_type = (PyTypeObject *)Py_NewRef(queue_type); @@ -1348,10 +1356,10 @@ _queueobj_from_xid(_PyXIData_t *data) PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } PyTypeObject *cls = get_external_queue_type(mod); @@ -1474,22 +1482,28 @@ qidarg_converter(PyObject *arg, void *ptr) static PyObject * queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"maxsize", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"maxsize", "unboundop", "fallback", NULL}; Py_ssize_t maxsize; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "nii:create", kwlist, - &maxsize, &fmt, &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "n|ii:create", kwlist, + &maxsize, &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t qid = queue_create(&_globals.queues, maxsize, fmt, unboundop); + int64_t qid = queue_create(&_globals.queues, maxsize, defaults); if (qid < 0) { (void)handle_queue_error((int)qid, self, qid); return NULL; @@ -1511,7 +1525,7 @@ queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_create_doc, -"create(maxsize, fmt, unboundop) -> qid\n\ +"create(maxsize, unboundop, fallback) -> qid\n\ \n\ Create a new cross-interpreter queue and return its unique generated ID.\n\ It is a new reference as though bind() had been called on the queue.\n\ @@ -1560,8 +1574,9 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } struct queue_id_and_info *cur = qids; for (int64_t i=0; i < count; cur++, i++) { - PyObject *item = Py_BuildValue("Lii", cur->id, cur->fmt, - cur->unboundop); + PyObject *item = Py_BuildValue("Lii", cur->id, + cur->defaults.unboundop, + cur->defaults.fallback); if (item == NULL) { Py_SETREF(ids, NULL); break; @@ -1575,34 +1590,44 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(queuesmod_list_all_doc, -"list_all() -> [(qid, fmt)]\n\ +"list_all() -> [(qid, unboundop, fallback)]\n\ \n\ Return the list of IDs for all queues.\n\ -Each corresponding default format is also included."); +Each corresponding default unbound op and fallback is also included."); static PyObject * queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"qid", "obj", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"qid", "obj", "unboundop", "fallback", NULL}; qidarg_converter_data qidarg = {0}; PyObject *obj; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&Oii:put", kwlist, - qidarg_converter, &qidarg, &obj, &fmt, - &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|ii:put", kwlist, + qidarg_converter, &qidarg, &obj, + &unboundarg, &fallbackarg)) { return NULL; } int64_t qid = qidarg.id; - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = queue_get_defaults(&_globals.queues, qid, &defaults); + if (handle_queue_error(err, self, qid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { return NULL; } /* Queue up the object. */ - int err = queue_put(&_globals.queues, qid, obj, fmt, unboundop); + int err = queue_put(&_globals.queues, qid, obj, unboundop, fallback); // This is the only place that raises QueueFull. if (handle_queue_error(err, self, qid)) { return NULL; @@ -1612,7 +1637,7 @@ queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_put_doc, -"put(qid, obj, fmt)\n\ +"put(qid, obj)\n\ \n\ Add the object's data to the queue."); @@ -1628,27 +1653,26 @@ queuesmod_get(PyObject *self, PyObject *args, PyObject *kwds) int64_t qid = qidarg.id; PyObject *obj = NULL; - int fmt = 0; int unboundop = 0; - int err = queue_get(&_globals.queues, qid, &obj, &fmt, &unboundop); + int err = queue_get(&_globals.queues, qid, &obj, &unboundop); // This is the only place that raises QueueEmpty. if (handle_queue_error(err, self, qid)) { return NULL; } if (obj == NULL) { - return Py_BuildValue("Oii", Py_None, fmt, unboundop); + return Py_BuildValue("Oi", Py_None, unboundop); } - PyObject *res = Py_BuildValue("OiO", obj, fmt, Py_None); + PyObject *res = Py_BuildValue("OO", obj, Py_None); Py_DECREF(obj); return res; } PyDoc_STRVAR(queuesmod_get_doc, -"get(qid) -> (obj, fmt)\n\ +"get(qid) -> (obj, unboundop)\n\ \n\ Return a new object from the data at the front of the queue.\n\ -The object's format is also returned.\n\ +The unbound op is also returned.\n\ \n\ If there is nothing to receive then raise QueueEmpty."); @@ -1748,17 +1772,14 @@ queuesmod_get_queue_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t qid = qidarg.id; - _queue *queue = NULL; - int err = _queues_lookup(&_globals.queues, qid, &queue); + struct _queuedefaults defaults = {0}; + int err = queue_get_defaults(&_globals.queues, qid, &defaults); if (handle_queue_error(err, self, qid)) { return NULL; } - int fmt = queue->defaults.fmt; - int unboundop = queue->defaults.unboundop; - _queue_unmark_waiter(queue, _globals.queues.mutex); - PyObject *defaults = Py_BuildValue("ii", fmt, unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(queuesmod_get_queue_defaults_doc, @@ -1931,8 +1952,7 @@ static int module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1941,8 +1961,7 @@ module_clear(PyObject *mod) module_state *state = get_module_state(mod); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index edd65577284a20..40fd51d752e324 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -5,8 +5,10 @@ _RESOLVE_MODINIT_FUNC_NAME(NAME) +#define GETDATA(FUNC) ((_PyXIData_getdata_t){.basic=FUNC}) + static int -ensure_xid_class(PyTypeObject *cls, xidatafunc getdata) +ensure_xid_class(PyTypeObject *cls, _PyXIData_getdata_t getdata) { PyThreadState *tstate = PyThreadState_Get(); return _PyXIData_RegisterClass(tstate, cls, getdata); @@ -37,10 +39,37 @@ _get_interpid(_PyXIData_t *data) } +#ifdef HAS_FALLBACK +static int +resolve_fallback(int arg, xidata_fallback_t dflt, + xidata_fallback_t *p_fallback) +{ + if (arg < 0) { + *p_fallback = dflt; + return 0; + } + xidata_fallback_t fallback; + if (arg == _PyXIDATA_XIDATA_ONLY) { + fallback =_PyXIDATA_XIDATA_ONLY; + } + else if (arg == _PyXIDATA_FULL_FALLBACK) { + fallback = _PyXIDATA_FULL_FALLBACK; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported fallback %d", arg); + return -1; + } + *p_fallback = fallback; + return 0; +} +#endif + + /* unbound items ************************************************************/ #ifdef HAS_UNBOUND_ITEMS +typedef int unboundop_t; #define UNBOUND_REMOVE 1 #define UNBOUND_ERROR 2 #define UNBOUND_REPLACE 3 @@ -51,6 +80,7 @@ _get_interpid(_PyXIData_t *data) // object is released but the underlying data is copied (with the "raw" // allocator) and used when the item is popped off the queue. +#ifndef NDEBUG static int check_unbound(int unboundop) { @@ -63,5 +93,31 @@ check_unbound(int unboundop) return 0; } } +#endif + +static int +resolve_unboundop(int arg, unboundop_t dflt, unboundop_t *p_unboundop) +{ + if (arg < 0) { + *p_unboundop = dflt; + return 0; + } + unboundop_t op; + if (arg == UNBOUND_REMOVE) { + op = UNBOUND_REMOVE; + } + else if (arg == UNBOUND_ERROR) { + op = UNBOUND_ERROR; + } + else if (arg == UNBOUND_REPLACE) { + op = UNBOUND_REPLACE; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported unboundop %d", arg); + return -1; + } + *p_unboundop = op; + return 0; +} #endif diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 77678f7c126005..faf3b25b68c4eb 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -8,6 +8,7 @@ #include "Python.h" #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -71,6 +72,22 @@ is_running_main(PyInterpreterState *interp) } +static inline int +is_notshareable_raised(PyThreadState *tstate) +{ + PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); + return _PyErr_ExceptionMatches(tstate, exctype); +} + +static void +unwrap_not_shareable(PyThreadState *tstate, _PyXI_failure *failure) +{ + if (_PyXI_UnwrapNotShareableError(tstate, failure) < 0) { + _PyErr_Clear(tstate); + } +} + + /* Cross-interpreter Buffer Views *******************************************/ /* When a memoryview object is "shared" between interpreters, @@ -286,7 +303,7 @@ register_memoryview_xid(PyObject *mod, PyTypeObject **p_state) *p_state = cls; // Register XID for the builtin memoryview type. - if (ensure_xid_class(&PyMemoryView_Type, _pybuffer_shared) < 0) { + if (ensure_xid_class(&PyMemoryView_Type, GETDATA(_pybuffer_shared)) < 0) { return -1; } // We don't ever bother un-registering memoryview. @@ -319,10 +336,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); @@ -359,96 +376,6 @@ _get_current_xibufferview_type(void) } -/* Python code **************************************************************/ - -static const char * -check_code_str(PyUnicodeObject *text) -{ - assert(text != NULL); - if (PyUnicode_GET_LENGTH(text) == 0) { - return "too short"; - } - - // XXX Verify that it parses? - - return NULL; -} - -static const char * -check_code_object(PyCodeObject *code) -{ - assert(code != NULL); - if (code->co_argcount > 0 - || code->co_posonlyargcount > 0 - || code->co_kwonlyargcount > 0 - || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) - { - return "arguments not supported"; - } - if (code->co_ncellvars > 0) { - return "closures not supported"; - } - // We trust that no code objects under co_consts have unbound cell vars. - - if (_PyCode_HAS_EXECUTORS(code) || _PyCode_HAS_INSTRUMENTATION(code)) { - return "only basic functions are supported"; - } - if (code->_co_monitoring != NULL) { - return "only basic functions are supported"; - } - if (code->co_extra != NULL) { - return "only basic functions are supported"; - } - - return NULL; -} - -#define RUN_TEXT 1 -#define RUN_CODE 2 - -static const char * -get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) -{ - const char *codestr = NULL; - Py_ssize_t len = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - - if (PyUnicode_Check(arg)) { - assert(PyUnicode_Check(arg) - && (check_code_str((PyUnicodeObject *)arg) == NULL)); - codestr = PyUnicode_AsUTF8AndSize(arg, &len); - if (codestr == NULL) { - return NULL; - } - if (strlen(codestr) != (size_t)len) { - PyErr_SetString(PyExc_ValueError, - "source code string cannot contain null bytes"); - return NULL; - } - flags = RUN_TEXT; - } - else { - assert(PyCode_Check(arg) - && (check_code_object((PyCodeObject *)arg) == NULL)); - flags = RUN_CODE; - - // Serialize the code object. - bytes_obj = PyMarshal_WriteObjectToString(arg, Py_MARSHAL_VERSION); - if (bytes_obj == NULL) { - return NULL; - } - codestr = PyBytes_AS_STRING(bytes_obj); - len = PyBytes_GET_SIZE(bytes_obj); - } - - *flags_p = flags; - *bytes_p = bytes_obj; - *len_p = len; - return codestr; -} - - /* interpreter-specific code ************************************************/ static int @@ -511,73 +438,290 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) } +struct interp_call { + _PyXIData_t *func; + _PyXIData_t *args; + _PyXIData_t *kwargs; + struct { + _PyXIData_t func; + _PyXIData_t args; + _PyXIData_t kwargs; + } _preallocated; +}; + +static void +_interp_call_clear(struct interp_call *call) +{ + if (call->func != NULL) { + _PyXIData_Clear(NULL, call->func); + } + if (call->args != NULL) { + _PyXIData_Clear(NULL, call->args); + } + if (call->kwargs != NULL) { + _PyXIData_Clear(NULL, call->kwargs); + } + *call = (struct interp_call){0}; +} + static int -_run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) +_interp_call_pack(PyThreadState *tstate, struct interp_call *call, + PyObject *func, PyObject *args, PyObject *kwargs) { - PyObject *result = NULL; - if (flags & RUN_TEXT) { - result = PyRun_StringFlags(codestr, Py_file_input, ns, ns, NULL); - } - else if (flags & RUN_CODE) { - PyObject *code = PyMarshal_ReadObjectFromString(codestr, codestrlen); - if (code != NULL) { - result = PyEval_EvalCode(code, ns, ns); - Py_DECREF(code); + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + assert(call->func == NULL); + assert(call->args == NULL); + assert(call->kwargs == NULL); + // Handle the func. + if (!PyCallable_Check(func)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a callable, got %R", func); + return -1; + } + if (_PyFunction_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (_PyPickle_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + _PyErr_SetRaisedException(tstate, exc); + return -1; } + Py_DECREF(exc); + } + call->func = &call->_preallocated.func; + // Handle the args. + if (args == NULL || args == Py_None) { + // Leave it empty. } else { - Py_UNREACHABLE(); + assert(PyTuple_Check(args)); + if (PyTuple_GET_SIZE(args) > 0) { + if (_PyObject_GetXIData( + tstate, args, fallback, &call->_preallocated.args) < 0) + { + _interp_call_clear(call); + return -1; + } + call->args = &call->_preallocated.args; + } } - if (result == NULL) { - return -1; + // Handle the kwargs. + if (kwargs == NULL || kwargs == Py_None) { + // Leave it empty. + } + else { + assert(PyDict_Check(kwargs)); + if (PyDict_GET_SIZE(kwargs) > 0) { + if (_PyObject_GetXIData( + tstate, kwargs, fallback, &call->_preallocated.kwargs) < 0) + { + _interp_call_clear(call); + return -1; + } + call->kwargs = &call->_preallocated.kwargs; + } } - Py_DECREF(result); // We throw away the result. return 0; } +static void +wrap_notshareable(PyThreadState *tstate, const char *label) +{ + if (!is_notshareable_raised(tstate)) { + return; + } + assert(label != NULL && strlen(label) > 0); + PyObject *cause = _PyErr_GetRaisedException(tstate); + _PyXIData_FormatNotShareableError(tstate, "%s not shareable", label); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyException_SetCause(exc, cause); + _PyErr_SetRaisedException(tstate, exc); +} + static int -_run_in_interpreter(PyInterpreterState *interp, - const char *codestr, Py_ssize_t codestrlen, - PyObject *shareables, int flags, - PyObject **p_excinfo) +_interp_call_unpack(struct interp_call *call, + PyObject **p_func, PyObject **p_args, PyObject **p_kwargs) { - assert(!PyErr_Occurred()); - _PyXI_session session = {0}; + PyThreadState *tstate = PyThreadState_Get(); - // Prep and switch interpreters. - if (_PyXI_Enter(&session, interp, shareables) < 0) { - if (PyErr_Occurred()) { - // If an error occured at this step, it means that interp - // was not prepared and switched. + // Unpack the func. + PyObject *func = _PyXIData_NewObject(call->func); + if (func == NULL) { + wrap_notshareable(tstate, "func"); + return -1; + } + // Unpack the args. + PyObject *args; + if (call->args == NULL) { + args = PyTuple_New(0); + if (args == NULL) { + Py_DECREF(func); return -1; } - // Now, apply the error from another interpreter: - PyObject *excinfo = _PyXI_ApplyError(session.error); - if (excinfo != NULL) { - *p_excinfo = excinfo; + } + else { + args = _PyXIData_NewObject(call->args); + if (args == NULL) { + wrap_notshareable(tstate, "args"); + Py_DECREF(func); + return -1; } - assert(PyErr_Occurred()); + assert(PyTuple_Check(args)); + } + // Unpack the kwargs. + PyObject *kwargs = NULL; + if (call->kwargs != NULL) { + kwargs = _PyXIData_NewObject(call->kwargs); + if (kwargs == NULL) { + wrap_notshareable(tstate, "kwargs"); + Py_DECREF(func); + Py_DECREF(args); + return -1; + } + assert(PyDict_Check(kwargs)); + } + *p_func = func; + *p_args = args; + *p_kwargs = kwargs; + return 0; +} + +static int +_make_call(struct interp_call *call, + PyObject **p_result, _PyXI_failure *failure) +{ + assert(call != NULL && call->func != NULL); + PyThreadState *tstate = _PyThreadState_GET(); + + // Get the func and args. + PyObject *func = NULL, *args = NULL, *kwargs = NULL; + if (_interp_call_unpack(call, &func, &args, &kwargs) < 0) { + assert(func == NULL); + assert(args == NULL); + assert(kwargs == NULL); + _PyXI_InitFailure(failure, _PyXI_ERR_OTHER, NULL); + unwrap_not_shareable(tstate, failure); return -1; } + assert(!_PyErr_Occurred(tstate)); - // Run the script. - int res = _run_script(session.main_ns, codestr, codestrlen, flags); + // Make the call. + PyObject *resobj = PyObject_Call(func, args, kwargs); + Py_DECREF(func); + Py_XDECREF(args); + Py_XDECREF(kwargs); + if (resobj == NULL) { + return -1; + } + *p_result = resobj; + return 0; +} - // Clean up and switch back. - _PyXI_Exit(&session); +static int +_run_script(_PyXIData_t *script, PyObject *ns, _PyXI_failure *failure) +{ + PyObject *code = _PyXIData_NewObject(script); + if (code == NULL) { + _PyXI_InitFailure(failure, _PyXI_ERR_NOT_SHAREABLE, NULL); + return -1; + } + PyObject *result = PyEval_EvalCode(code, ns, ns); + Py_DECREF(code); + if (result == NULL) { + _PyXI_InitFailure(failure, _PyXI_ERR_UNCAUGHT_EXCEPTION, NULL); + return -1; + } + assert(result == Py_None); + Py_DECREF(result); // We throw away the result. + return 0; +} - // Propagate any exception out to the caller. - assert(!PyErr_Occurred()); - if (res < 0) { - PyObject *excinfo = _PyXI_ApplyCapturedException(&session); - if (excinfo != NULL) { - *p_excinfo = excinfo; +struct run_result { + PyObject *result; + PyObject *excinfo; +}; + +static void +_run_result_clear(struct run_result *runres) +{ + Py_CLEAR(runres->result); + Py_CLEAR(runres->excinfo); +} + +static int +_run_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, + _PyXIData_t *script, struct interp_call *call, + PyObject *shareables, struct run_result *runres) +{ + assert(!_PyErr_Occurred(tstate)); + int res = -1; + _PyXI_failure *failure = _PyXI_NewFailure(); + if (failure == NULL) { + return -1; + } + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + _PyXI_FreeFailure(failure); + return -1; + } + _PyXI_session_result result = {0}; + + // Prep and switch interpreters. + if (_PyXI_Enter(session, interp, shareables, &result) < 0) { + // If an error occurred at this step, it means that interp + // was not prepared and switched. + _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); + assert(result.excinfo == NULL); + return -1; + } + + // Run in the interpreter. + if (script != NULL) { + assert(call == NULL); + PyObject *mainns = _PyXI_GetMainNamespace(session, failure); + if (mainns == NULL) { + goto finally; } + res = _run_script(script, mainns, failure); } else { - assert(!_PyXI_HasCapturedException(&session)); + assert(call != NULL); + PyObject *resobj; + res = _make_call(call, &resobj, failure); + if (res == 0) { + res = _PyXI_Preserve(session, "resobj", resobj, failure); + Py_DECREF(resobj); + if (res < 0) { + goto finally; + } + } } +finally: + // Clean up and switch back. + (void)res; + int exitres = _PyXI_Exit(session, failure, &result); + assert(res == 0 || exitres != 0); + _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); + + res = exitres; + if (_PyErr_Occurred(tstate)) { + // It's a directly propagated exception. + assert(res < 0); + } + else if (res < 0) { + assert(result.excinfo != NULL); + runres->excinfo = Py_NewRef(result.excinfo); + res = -1; + } + else { + assert(result.excinfo == NULL); + runres->result = _PyXI_GetPreserved(&result, "resobj"); + if (_PyErr_Occurred(tstate)) { + res = -1; + } + } + _PyXI_ClearResult(&result); return res; } @@ -895,8 +1039,8 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) PyObject *id, *updates; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$p:" MODULE_NAME_STR ".set___main___attrs", - kwlist, &id, &updates, &restricted)) + "OO!|$p:" MODULE_NAME_STR ".set___main___attrs", + kwlist, &id, &PyDict_Type, &updates, &restricted)) { return NULL; } @@ -910,34 +1054,39 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } // Check the updates. - if (updates != Py_None) { - Py_ssize_t size = PyObject_Size(updates); - if (size < 0) { - return NULL; - } - if (size == 0) { - PyErr_SetString(PyExc_ValueError, - "arg 2 must be a non-empty mapping"); - return NULL; - } + Py_ssize_t size = PyDict_Size(updates); + if (size < 0) { + return NULL; + } + if (size == 0) { + PyErr_SetString(PyExc_ValueError, + "arg 2 must be a non-empty dict"); + return NULL; } - _PyXI_session session = {0}; + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + return NULL; + } // Prep and switch interpreters, including apply the updates. - if (_PyXI_Enter(&session, interp, updates) < 0) { - if (!PyErr_Occurred()) { - _PyXI_ApplyCapturedException(&session); - assert(PyErr_Occurred()); - } - else { - assert(!_PyXI_HasCapturedException(&session)); - } + if (_PyXI_Enter(session, interp, updates, NULL) < 0) { + _PyXI_FreeSession(session); return NULL; } // Clean up and switch back. - _PyXI_Exit(&session); + assert(!PyErr_Occurred()); + int res = _PyXI_Exit(session, NULL, NULL); + _PyXI_FreeSession(session); + assert(res == 0); + if (res < 0) { + // unreachable + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "unresolved error"); + } + return NULL; + } Py_RETURN_NONE; } @@ -948,122 +1097,31 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static PyUnicodeObject * -convert_script_arg(PyObject *arg, const char *fname, const char *displayname, - const char *expected) -{ - PyUnicodeObject *str = NULL; - if (PyUnicode_CheckExact(arg)) { - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else if (PyUnicode_Check(arg)) { - // XXX str = PyUnicode_FromObject(arg); - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_str(str); - if (err != NULL) { - Py_DECREF(str); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad script text (%s)", fname, err); - return NULL; - } - - return str; -} - -static PyCodeObject * -convert_code_arg(PyObject *arg, const char *fname, const char *displayname, - const char *expected) +static PyObject * +_handle_script_error(struct run_result *runres) { - const char *kind = NULL; - PyCodeObject *code = NULL; - if (PyFunction_Check(arg)) { - if (PyFunction_GetClosure(arg) != NULL) { - PyErr_Format(PyExc_ValueError, - "%.200s(): closures not supported", fname); - return NULL; - } - code = (PyCodeObject *)PyFunction_GetCode(arg); - if (code == NULL) { - if (PyErr_Occurred()) { - // This chains. - PyErr_Format(PyExc_ValueError, - "%.200s(): bad func", fname); - } - else { - PyErr_Format(PyExc_ValueError, - "%.200s(): func.__code__ missing", fname); - } - return NULL; - } - Py_INCREF(code); - kind = "func"; - } - else if (PyCode_Check(arg)) { - code = (PyCodeObject *)Py_NewRef(arg); - kind = "code object"; - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_object(code); - if (err != NULL) { - Py_DECREF(code); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad %s (%s)", fname, kind, err); + assert(runres->result == NULL); + if (runres->excinfo == NULL) { + assert(PyErr_Occurred()); return NULL; } - - return code; -} - -static int -_interp_exec(PyObject *self, PyInterpreterState *interp, - PyObject *code_arg, PyObject *shared_arg, PyObject **p_excinfo) -{ - if (shared_arg != NULL && !PyDict_CheckExact(shared_arg)) { - PyErr_SetString(PyExc_TypeError, "expected 'shared' to be a dict"); - return -1; - } - - // Extract code. - Py_ssize_t codestrlen = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - const char *codestr = get_code_str(code_arg, - &codestrlen, &bytes_obj, &flags); - if (codestr == NULL) { - return -1; - } - - // Run the code in the interpreter. - int res = _run_in_interpreter(interp, codestr, codestrlen, - shared_arg, flags, p_excinfo); - Py_XDECREF(bytes_obj); - if (res < 0) { - return -1; - } - - return 0; + assert(!PyErr_Occurred()); + return runres->excinfo; } static PyObject * interp_exec(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".exec" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "code", "shared", "restrict", NULL}; PyObject *id, *code; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".exec", kwlist, - &id, &code, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &code, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1075,27 +1133,24 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - const char *expected = "a string, a function, or a code object"; - if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); - } - else { - code = (PyObject *)convert_code_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); - } - if (code == NULL) { + // We don't need the script to be "pure", which means it can use + // global variables. They will be resolved against __main__. + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, shared, &excinfo); - Py_DECREF(code); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(exec_doc, @@ -1118,13 +1173,16 @@ is ignored, including its __globals__ dict."); static PyObject * interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_string" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "script", "shared", "restrict", NULL}; PyObject *id, *script; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O$p:" MODULE_NAME_STR ".run_string", - kwlist, &id, &script, &shared, &restricted)) + "OU|O!$p:" FUNCNAME, kwlist, + &id, &script, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1136,20 +1194,27 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - script = (PyObject *)convert_script_arg(script, MODULE_NAME_STR ".run_string", - "argument 2", "a string"); - if (script == NULL) { + if (PyFunction_Check(script) || PyCode_Check(script)) { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a string", script); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, script, shared, &excinfo); - Py_DECREF(script); + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); + return NULL; + } + + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_string_doc, @@ -1162,13 +1227,16 @@ Execute the provided string in the identified interpreter.\n\ static PyObject * interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_func" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "func", "shared", "restrict", NULL}; PyObject *id, *func; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".run_func", - kwlist, &id, &func, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &func, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1180,21 +1248,36 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyCodeObject *code = convert_code_arg(func, MODULE_NAME_STR ".exec", - "argument 2", - "a function or a code object"); - if (code == NULL) { + // We don't worry about checking globals. They will be resolved + // against __main__. + PyObject *code; + if (PyFunction_Check(func)) { + code = PyFunction_GET_CODE(func); + } + else if (PyCode_Check(func)) { + code = func; + } + else { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a function", func); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, (PyObject *)code, shared, &excinfo); - Py_DECREF(code); + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); + return NULL; + } + + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_func_doc, @@ -1209,16 +1292,21 @@ are not supported. Methods and other callables are not supported either.\n\ static PyObject * interp_call(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".call" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "callable", "args", "kwargs", - "restrict", NULL}; + "preserve_exc", "restrict", NULL}; PyObject *id, *callable; PyObject *args_obj = NULL; PyObject *kwargs_obj = NULL; + int preserve_exc = 0; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|OO$p:" MODULE_NAME_STR ".call", kwlist, - &id, &callable, &args_obj, &kwargs_obj, - &restricted)) + "OO|O!O!$pp:" FUNCNAME, kwlist, + &id, &callable, + &PyTuple_Type, &args_obj, + &PyDict_Type, &kwargs_obj, + &preserve_exc, &restricted)) { return NULL; } @@ -1230,42 +1318,37 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (args_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected args"); - return NULL; - } - if (kwargs_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected kwargs"); + struct interp_call call = {0}; + if (_interp_call_pack(tstate, &call, callable, args_obj, kwargs_obj) < 0) { return NULL; } - PyObject *code = (PyObject *)convert_code_arg(callable, MODULE_NAME_STR ".call", - "argument 2", "a function"); - if (code == NULL) { - return NULL; + PyObject *res_and_exc = NULL; + struct run_result runres = {0}; + if (_run_in_interpreter(tstate, interp, NULL, &call, NULL, &runres) < 0) { + if (runres.excinfo == NULL) { + assert(_PyErr_Occurred(tstate)); + goto finally; + } + assert(!_PyErr_Occurred(tstate)); } + assert(runres.result == NULL || runres.excinfo == NULL); + res_and_exc = Py_BuildValue("OO", + (runres.result ? runres.result : Py_None), + (runres.excinfo ? runres.excinfo : Py_None)); - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, NULL, &excinfo); - Py_DECREF(code); - if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; - } - Py_RETURN_NONE; +finally: + _interp_call_clear(&call); + _run_result_clear(&runres); + return res_and_exc; +#undef FUNCNAME } PyDoc_STRVAR(call_doc, "call(id, callable, args=None, kwargs=None, *, restrict=False)\n\ \n\ Call the provided object in the identified interpreter.\n\ -Pass the given args and kwargs, if possible.\n\ -\n\ -\"callable\" may be a plain function with no free vars that takes\n\ -no arguments.\n\ -\n\ -The function's code object is used and all its state\n\ -is ignored, including its __globals__ dict."); +Pass the given args and kwargs, if possible."); static PyObject * @@ -1332,11 +1415,14 @@ interp_get_config(PyObject *self, PyObject *args, PyObject *kwds) PyObject *idobj = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O?|$p:get_config", kwlist, + "O|$p:get_config", kwlist, &idobj, &restricted)) { return NULL; } + if (idobj == Py_None) { + idobj = NULL; + } int reqready = 0; PyInterpreterState *interp = \ @@ -1453,14 +1539,14 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"exc", NULL}; PyObject *exc_arg = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "|O?:capture_exception", kwlist, + "|O:capture_exception", kwlist, &exc_arg)) { return NULL; } PyObject *exc = exc_arg; - if (exc == NULL) { + if (exc == NULL || exc == Py_None) { exc = PyErr_GetRaisedException(); if (exc == NULL) { Py_RETURN_NONE; @@ -1472,16 +1558,16 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } PyObject *captured = NULL; - _PyXI_excinfo info = {0}; - if (_PyXI_InitExcInfo(&info, exc) < 0) { + _PyXI_excinfo *info = _PyXI_NewExcInfo(exc); + if (info == NULL) { goto finally; } - captured = _PyXI_ExcInfoAsObject(&info); + captured = _PyXI_ExcInfoAsObject(info); if (captured == NULL) { goto finally; } - PyObject *formatted = _PyXI_FormatExcInfo(&info); + PyObject *formatted = _PyXI_FormatExcInfo(info); if (formatted == NULL) { Py_CLEAR(captured); goto finally; @@ -1494,7 +1580,7 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } finally: - _PyXI_ClearExcInfo(&info); + _PyXI_FreeExcInfo(info); if (exc != exc_arg) { if (PyErr_Occurred()) { PyErr_SetRaisedException(exc); @@ -1623,8 +1709,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1632,8 +1717,7 @@ module_clear(PyObject *mod) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 4724e97982f349..25c8bf8b3d508b 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -13,6 +13,7 @@ #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pylifecycle.h" // _Py_IsInterpreterFinalizing() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" @@ -421,8 +422,7 @@ buffered_dealloc(PyObject *op) return; _PyObject_GC_UNTRACK(self); self->ok = 0; - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); if (self->buffer) { PyMem_Free(self->buffer); self->buffer = NULL; @@ -2312,8 +2312,7 @@ bufferedrwpair_dealloc(PyObject *op) rwpair *self = rwpair_CAST(op); PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)bufferedrwpair_clear(op); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index e45a2d1a16dcba..a3b653f30dd2cf 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -1,8 +1,11 @@ #include "Python.h" +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() #include "pycore_object.h" #include "pycore_sysmodule.h" // _PySys_GetSizeOf() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() +#include "pycore_pyatomic_ft_wrappers.h" -#include <stddef.h> // offsetof() +#include <stddef.h> // offsetof() #include "_iomodule.h" /*[clinic input] @@ -50,7 +53,7 @@ check_closed(bytesio *self) static int check_exports(bytesio *self) { - if (self->exports > 0) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) > 0) { PyErr_SetString(PyExc_BufferError, "Existing exports of data: object cannot be re-sized"); return 1; @@ -68,15 +71,17 @@ check_exports(bytesio *self) return NULL; \ } -#define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1) +#define SHARED_BUF(self) (!_PyObject_IsUniquelyReferenced((self)->buf)) /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ static Py_ssize_t -scan_eol(bytesio *self, Py_ssize_t len) +scan_eol_lock_held(bytesio *self, Py_ssize_t len) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + const char *start, *n; Py_ssize_t maxlen; @@ -109,11 +114,13 @@ scan_eol(bytesio *self, Py_ssize_t len) The caller should ensure that the 'size' argument is non-negative and not lesser than self->string_size. Returns 0 on success, -1 otherwise. */ static int -unshare_buffer(bytesio *self, size_t size) +unshare_buffer_lock_held(bytesio *self, size_t size) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + PyObject *new_buf; assert(SHARED_BUF(self)); - assert(self->exports == 0); + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0); assert(size >= (size_t)self->string_size); new_buf = PyBytes_FromStringAndSize(NULL, size); if (new_buf == NULL) @@ -128,10 +135,12 @@ unshare_buffer(bytesio *self, size_t size) The caller should ensure that the 'size' argument is non-negative. Returns 0 on success, -1 otherwise. */ static int -resize_buffer(bytesio *self, size_t size) +resize_buffer_lock_held(bytesio *self, size_t size) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + assert(self->buf != NULL); - assert(self->exports == 0); + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0); /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ @@ -160,7 +169,7 @@ resize_buffer(bytesio *self, size_t size) } if (SHARED_BUF(self)) { - if (unshare_buffer(self, alloc) < 0) + if (unshare_buffer_lock_held(self, alloc) < 0) return -1; } else { @@ -181,8 +190,10 @@ resize_buffer(bytesio *self, size_t size) Inlining is disabled because it's significantly decreases performance of writelines() in PGO build. */ Py_NO_INLINE static Py_ssize_t -write_bytes(bytesio *self, PyObject *b) +write_bytes_lock_held(bytesio *self, PyObject *b) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + if (check_closed(self)) { return -1; } @@ -202,13 +213,13 @@ write_bytes(bytesio *self, PyObject *b) assert(self->pos >= 0); size_t endpos = (size_t)self->pos + len; if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) { - if (resize_buffer(self, endpos) < 0) { + if (resize_buffer_lock_held(self, endpos) < 0) { len = -1; goto done; } } else if (SHARED_BUF(self)) { - if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) { + if (unshare_buffer_lock_held(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) { len = -1; goto done; } @@ -245,16 +256,21 @@ write_bytes(bytesio *self, PyObject *b) static PyObject * bytesio_get_closed(PyObject *op, void *Py_UNUSED(closure)) { + PyObject *ret; bytesio *self = bytesio_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); if (self->buf == NULL) { - Py_RETURN_TRUE; + ret = Py_True; } else { - Py_RETURN_FALSE; + ret = Py_False; } + Py_END_CRITICAL_SECTION(); + return ret; } /*[clinic input] +@critical_section _io.BytesIO.readable Returns True if the IO object can be read. @@ -262,13 +278,14 @@ Returns True if the IO object can be read. static PyObject * _io_BytesIO_readable_impl(bytesio *self) -/*[clinic end generated code: output=4e93822ad5b62263 input=96c5d0cccfb29f5c]*/ +/*[clinic end generated code: output=4e93822ad5b62263 input=ab7816facef48bfd]*/ { CHECK_CLOSED(self); Py_RETURN_TRUE; } /*[clinic input] +@critical_section _io.BytesIO.writable Returns True if the IO object can be written. @@ -276,13 +293,14 @@ Returns True if the IO object can be written. static PyObject * _io_BytesIO_writable_impl(bytesio *self) -/*[clinic end generated code: output=64ff6a254b1150b8 input=700eed808277560a]*/ +/*[clinic end generated code: output=64ff6a254b1150b8 input=4f35d49d26dab024]*/ { CHECK_CLOSED(self); Py_RETURN_TRUE; } /*[clinic input] +@critical_section _io.BytesIO.seekable Returns True if the IO object can be seeked. @@ -290,13 +308,14 @@ Returns True if the IO object can be seeked. static PyObject * _io_BytesIO_seekable_impl(bytesio *self) -/*[clinic end generated code: output=6b417f46dcc09b56 input=9421f65627a344dd]*/ +/*[clinic end generated code: output=6b417f46dcc09b56 input=9cc78d15aa1deaa3]*/ { CHECK_CLOSED(self); Py_RETURN_TRUE; } /*[clinic input] +@critical_section _io.BytesIO.flush Does nothing. @@ -304,13 +323,14 @@ Does nothing. static PyObject * _io_BytesIO_flush_impl(bytesio *self) -/*[clinic end generated code: output=187e3d781ca134a0 input=561ea490be4581a7]*/ +/*[clinic end generated code: output=187e3d781ca134a0 input=c60842743910b381]*/ { CHECK_CLOSED(self); Py_RETURN_NONE; } /*[clinic input] +@critical_section _io.BytesIO.getbuffer cls: defining_class @@ -321,7 +341,7 @@ Get a read-write view over the contents of the BytesIO object. static PyObject * _io_BytesIO_getbuffer_impl(bytesio *self, PyTypeObject *cls) -/*[clinic end generated code: output=045091d7ce87fe4e input=0668fbb48f95dffa]*/ +/*[clinic end generated code: output=045091d7ce87fe4e input=8295764061be77fd]*/ { _PyIO_State *state = get_io_state_by_cls(cls); PyTypeObject *type = state->PyBytesIOBuffer_Type; @@ -340,6 +360,7 @@ _io_BytesIO_getbuffer_impl(bytesio *self, PyTypeObject *cls) } /*[clinic input] +@critical_section _io.BytesIO.getvalue Retrieve the entire contents of the BytesIO object. @@ -347,16 +368,16 @@ Retrieve the entire contents of the BytesIO object. static PyObject * _io_BytesIO_getvalue_impl(bytesio *self) -/*[clinic end generated code: output=b3f6a3233c8fd628 input=4b403ac0af3973ed]*/ +/*[clinic end generated code: output=b3f6a3233c8fd628 input=c91bff398df0c352]*/ { CHECK_CLOSED(self); - if (self->string_size <= 1 || self->exports > 0) + if (self->string_size <= 1 || FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) > 0) return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf), self->string_size); if (self->string_size != PyBytes_GET_SIZE(self->buf)) { if (SHARED_BUF(self)) { - if (unshare_buffer(self, self->string_size) < 0) + if (unshare_buffer_lock_held(self, self->string_size) < 0) return NULL; } else { @@ -368,6 +389,7 @@ _io_BytesIO_getvalue_impl(bytesio *self) } /*[clinic input] +@critical_section _io.BytesIO.isatty Always returns False. @@ -377,13 +399,14 @@ BytesIO objects are not connected to a TTY-like device. static PyObject * _io_BytesIO_isatty_impl(bytesio *self) -/*[clinic end generated code: output=df67712e669f6c8f input=6f97f0985d13f827]*/ +/*[clinic end generated code: output=df67712e669f6c8f input=50487b74dc5ae8a9]*/ { CHECK_CLOSED(self); Py_RETURN_FALSE; } /*[clinic input] +@critical_section _io.BytesIO.tell Current file position, an integer. @@ -391,32 +414,42 @@ Current file position, an integer. static PyObject * _io_BytesIO_tell_impl(bytesio *self) -/*[clinic end generated code: output=b54b0f93cd0e5e1d input=b106adf099cb3657]*/ +/*[clinic end generated code: output=b54b0f93cd0e5e1d input=2c7b0e8f82e05c4d]*/ { CHECK_CLOSED(self); return PyLong_FromSsize_t(self->pos); } static PyObject * -read_bytes(bytesio *self, Py_ssize_t size) +read_bytes_lock_held(bytesio *self, Py_ssize_t size) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + const char *output; assert(self->buf != NULL); assert(size <= self->string_size); if (size > 1 && self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && - self->exports == 0) { + FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0) { self->pos += size; return Py_NewRef(self->buf); } + /* gh-141311: Avoid undefined behavior when self->pos (limit PY_SSIZE_T_MAX) + is beyond the size of self->buf. Assert above validates size is always in + bounds. When self->pos is out of bounds calling code sets size to 0. */ + if (size == 0) { + return PyBytes_FromStringAndSize(NULL, 0); + } + output = PyBytes_AS_STRING(self->buf) + self->pos; self->pos += size; return PyBytes_FromStringAndSize(output, size); } /*[clinic input] +@critical_section _io.BytesIO.read size: Py_ssize_t(accept={int, NoneType}) = -1 / @@ -429,7 +462,7 @@ Return an empty bytes object at EOF. static PyObject * _io_BytesIO_read_impl(bytesio *self, Py_ssize_t size) -/*[clinic end generated code: output=9cc025f21c75bdd2 input=74344a39f431c3d7]*/ +/*[clinic end generated code: output=9cc025f21c75bdd2 input=9e2f7ff3075fdd39]*/ { Py_ssize_t n; @@ -443,11 +476,12 @@ _io_BytesIO_read_impl(bytesio *self, Py_ssize_t size) size = 0; } - return read_bytes(self, size); + return read_bytes_lock_held(self, size); } /*[clinic input] +@critical_section _io.BytesIO.read1 size: Py_ssize_t(accept={int, NoneType}) = -1 / @@ -460,12 +494,13 @@ Return an empty bytes object at EOF. static PyObject * _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) -/*[clinic end generated code: output=d0f843285aa95f1c input=440a395bf9129ef5]*/ +/*[clinic end generated code: output=d0f843285aa95f1c input=a08fc9e507ab380c]*/ { return _io_BytesIO_read_impl(self, size); } /*[clinic input] +@critical_section _io.BytesIO.readline size: Py_ssize_t(accept={int, NoneType}) = -1 / @@ -479,18 +514,19 @@ Return an empty bytes object at EOF. static PyObject * _io_BytesIO_readline_impl(bytesio *self, Py_ssize_t size) -/*[clinic end generated code: output=4bff3c251df8ffcd input=e7c3fbd1744e2783]*/ +/*[clinic end generated code: output=4bff3c251df8ffcd input=db09d47e23cf2c9e]*/ { Py_ssize_t n; CHECK_CLOSED(self); - n = scan_eol(self, size); + n = scan_eol_lock_held(self, size); - return read_bytes(self, n); + return read_bytes_lock_held(self, n); } /*[clinic input] +@critical_section _io.BytesIO.readlines size as arg: object = None / @@ -504,7 +540,7 @@ total number of bytes in the lines returned. static PyObject * _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg) -/*[clinic end generated code: output=09b8e34c880808ff input=691aa1314f2c2a87]*/ +/*[clinic end generated code: output=09b8e34c880808ff input=5c57d7d78e409985]*/ { Py_ssize_t maxsize, size, n; PyObject *result, *line; @@ -533,7 +569,7 @@ _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg) return NULL; output = PyBytes_AS_STRING(self->buf) + self->pos; - while ((n = scan_eol(self, -1)) != 0) { + while ((n = scan_eol_lock_held(self, -1)) != 0) { self->pos += n; line = PyBytes_FromStringAndSize(output, n); if (!line) @@ -556,6 +592,7 @@ _io_BytesIO_readlines_impl(bytesio *self, PyObject *arg) } /*[clinic input] +@critical_section _io.BytesIO.readinto buffer: Py_buffer(accept={rwbuffer}) / @@ -568,7 +605,7 @@ is set not to block and has no data to read. static PyObject * _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) -/*[clinic end generated code: output=a5d407217dcf0639 input=1424d0fdce857919]*/ +/*[clinic end generated code: output=a5d407217dcf0639 input=093a8d330de3fcd1]*/ { Py_ssize_t len, n; @@ -579,21 +616,25 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) n = self->string_size - self->pos; if (len > n) { len = n; - if (len < 0) - len = 0; + if (len < 0) { + /* gh-141311: Avoid undefined behavior when self->pos (limit + PY_SSIZE_T_MAX) points beyond the size of self->buf. */ + return PyLong_FromSsize_t(0); + } } - memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); - assert(self->pos + len < PY_SSIZE_T_MAX); + assert(self->pos + len <= PY_SSIZE_T_MAX); assert(len >= 0); + memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); self->pos += len; return PyLong_FromSsize_t(len); } /*[clinic input] +@critical_section _io.BytesIO.truncate - size: Py_ssize_t(accept={int, NoneType}, c_default="((bytesio *)self)->pos") = None + size: object = None / Truncate the file to at most size bytes. @@ -603,44 +644,68 @@ The current file position is unchanged. Returns the new size. [clinic start generated code]*/ static PyObject * -_io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size) -/*[clinic end generated code: output=9ad17650c15fa09b input=dae4295e11c1bbb4]*/ +_io_BytesIO_truncate_impl(bytesio *self, PyObject *size) +/*[clinic end generated code: output=ab42491b4824f384 input=b4acb5f80481c053]*/ { CHECK_CLOSED(self); CHECK_EXPORTS(self); - if (size < 0) { - PyErr_Format(PyExc_ValueError, - "negative size value %zd", size); - return NULL; + Py_ssize_t new_size; + + if (size == Py_None) { + new_size = self->pos; + } + else { + new_size = PyLong_AsLong(size); + if (new_size == -1 && PyErr_Occurred()) { + return NULL; + } + if (new_size < 0) { + PyErr_Format(PyExc_ValueError, + "negative size value %zd", new_size); + return NULL; + } } - if (size < self->string_size) { - self->string_size = size; - if (resize_buffer(self, size) < 0) + if (new_size < self->string_size) { + self->string_size = new_size; + if (resize_buffer_lock_held(self, new_size) < 0) return NULL; } - return PyLong_FromSsize_t(size); + return PyLong_FromSsize_t(new_size); } static PyObject * -bytesio_iternext(PyObject *op) +bytesio_iternext_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); + Py_ssize_t n; bytesio *self = bytesio_CAST(op); CHECK_CLOSED(self); - n = scan_eol(self, -1); + n = scan_eol_lock_held(self, -1); if (n == 0) return NULL; - return read_bytes(self, n); + return read_bytes_lock_held(self, n); +} + +static PyObject * +bytesio_iternext(PyObject *op) +{ + PyObject *ret; + Py_BEGIN_CRITICAL_SECTION(op); + ret = bytesio_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return ret; } /*[clinic input] +@critical_section _io.BytesIO.seek pos: Py_ssize_t whence: int = 0 @@ -657,7 +722,7 @@ Returns the new absolute position. static PyObject * _io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence) -/*[clinic end generated code: output=c26204a68e9190e4 input=1e875e6ebc652948]*/ +/*[clinic end generated code: output=c26204a68e9190e4 input=20f05ddf659255df]*/ { CHECK_CLOSED(self); @@ -700,6 +765,7 @@ _io_BytesIO_seek_impl(bytesio *self, Py_ssize_t pos, int whence) } /*[clinic input] +@critical_section _io.BytesIO.write b: object / @@ -711,13 +777,14 @@ Return the number of bytes written. static PyObject * _io_BytesIO_write_impl(bytesio *self, PyObject *b) -/*[clinic end generated code: output=d3e46bcec8d9e21c input=f5ec7c8c64ed720a]*/ +/*[clinic end generated code: output=d3e46bcec8d9e21c input=46c0c17eac7474a4]*/ { - Py_ssize_t n = write_bytes(self, b); + Py_ssize_t n = write_bytes_lock_held(self, b); return n >= 0 ? PyLong_FromSsize_t(n) : NULL; } /*[clinic input] +@critical_section _io.BytesIO.writelines lines: object / @@ -731,7 +798,7 @@ each element. static PyObject * _io_BytesIO_writelines_impl(bytesio *self, PyObject *lines) -/*[clinic end generated code: output=03a43a75773bc397 input=e972539176fc8fc1]*/ +/*[clinic end generated code: output=03a43a75773bc397 input=5d6a616ae39dc9ca]*/ { PyObject *it, *item; @@ -742,7 +809,7 @@ _io_BytesIO_writelines_impl(bytesio *self, PyObject *lines) return NULL; while ((item = PyIter_Next(it)) != NULL) { - Py_ssize_t ret = write_bytes(self, item); + Py_ssize_t ret = write_bytes_lock_held(self, item); Py_DECREF(item); if (ret < 0) { Py_DECREF(it); @@ -759,6 +826,7 @@ _io_BytesIO_writelines_impl(bytesio *self, PyObject *lines) } /*[clinic input] +@critical_section _io.BytesIO.close Disable all I/O operations. @@ -766,7 +834,7 @@ Disable all I/O operations. static PyObject * _io_BytesIO_close_impl(bytesio *self) -/*[clinic end generated code: output=1471bb9411af84a0 input=37e1f55556e61f60]*/ +/*[clinic end generated code: output=1471bb9411af84a0 input=34ce76d8bd17a23b]*/ { CHECK_EXPORTS(self); Py_CLEAR(self->buf); @@ -788,35 +856,49 @@ _io_BytesIO_close_impl(bytesio *self) function to use the efficient instance representation of PEP 307. */ + static PyObject * + bytesio_getstate_lock_held(PyObject *op) + { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); + + bytesio *self = bytesio_CAST(op); + PyObject *initvalue = _io_BytesIO_getvalue_impl(self); + PyObject *dict; + PyObject *state; + + if (initvalue == NULL) + return NULL; + if (self->dict == NULL) { + dict = Py_NewRef(Py_None); + } + else { + dict = PyDict_Copy(self->dict); + if (dict == NULL) { + Py_DECREF(initvalue); + return NULL; + } + } + + state = Py_BuildValue("(OnN)", initvalue, self->pos, dict); + Py_DECREF(initvalue); + return state; +} + static PyObject * bytesio_getstate(PyObject *op, PyObject *Py_UNUSED(dummy)) { - bytesio *self = bytesio_CAST(op); - PyObject *initvalue = _io_BytesIO_getvalue_impl(self); - PyObject *dict; - PyObject *state; - - if (initvalue == NULL) - return NULL; - if (self->dict == NULL) { - dict = Py_NewRef(Py_None); - } - else { - dict = PyDict_Copy(self->dict); - if (dict == NULL) { - Py_DECREF(initvalue); - return NULL; - } - } - - state = Py_BuildValue("(OnN)", initvalue, self->pos, dict); - Py_DECREF(initvalue); - return state; + PyObject *ret; + Py_BEGIN_CRITICAL_SECTION(op); + ret = bytesio_getstate_lock_held(op); + Py_END_CRITICAL_SECTION(); + return ret; } static PyObject * -bytesio_setstate(PyObject *op, PyObject *state) +bytesio_setstate_lock_held(PyObject *op, PyObject *state) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); + PyObject *result; PyObject *position_obj; PyObject *dict; @@ -890,21 +972,30 @@ bytesio_setstate(PyObject *op, PyObject *state) Py_RETURN_NONE; } +static PyObject * +bytesio_setstate(PyObject *op, PyObject *state) +{ + PyObject *ret; + Py_BEGIN_CRITICAL_SECTION(op); + ret = bytesio_setstate_lock_held(op, state); + Py_END_CRITICAL_SECTION(); + return ret; +} + static void bytesio_dealloc(PyObject *op) { bytesio *self = bytesio_CAST(op); PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - if (self->exports > 0) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) > 0) { PyErr_SetString(PyExc_SystemError, "deallocated BytesIO object has exported buffers"); PyErr_Print(); } Py_CLEAR(self->buf); Py_CLEAR(self->dict); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } @@ -932,6 +1023,7 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } /*[clinic input] +@critical_section _io.BytesIO.__init__ initial_bytes as initvalue: object(c_default="NULL") = b'' @@ -940,13 +1032,13 @@ Buffered I/O implementation using an in-memory bytes buffer. static int _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue) -/*[clinic end generated code: output=65c0c51e24c5b621 input=aac7f31b67bf0fb6]*/ +/*[clinic end generated code: output=65c0c51e24c5b621 input=3da5a74ee4c4f1ac]*/ { /* In case, __init__ is called multiple times. */ self->string_size = 0; self->pos = 0; - if (self->exports > 0) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) > 0) { PyErr_SetString(PyExc_BufferError, "Existing exports of data: object cannot be re-sized"); return -1; @@ -970,8 +1062,10 @@ _io_BytesIO___init___impl(bytesio *self, PyObject *initvalue) } static PyObject * -bytesio_sizeof(PyObject *op, PyObject *Py_UNUSED(dummy)) +bytesio_sizeof_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); + bytesio *self = bytesio_CAST(op); size_t res = _PyObject_SIZE(Py_TYPE(self)); if (self->buf && !SHARED_BUF(self)) { @@ -984,6 +1078,16 @@ bytesio_sizeof(PyObject *op, PyObject *Py_UNUSED(dummy)) return PyLong_FromSize_t(res); } +static PyObject * +bytesio_sizeof(PyObject *op, PyObject *Py_UNUSED(dummy)) +{ + PyObject *ret; + Py_BEGIN_CRITICAL_SECTION(op); + ret = bytesio_sizeof_lock_held(op); + Py_END_CRITICAL_SECTION(); + return ret; +} + static int bytesio_traverse(PyObject *op, visitproc visit, void *arg) { @@ -999,7 +1103,7 @@ bytesio_clear(PyObject *op) { bytesio *self = bytesio_CAST(op); Py_CLEAR(self->dict); - if (self->exports == 0) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0) { Py_CLEAR(self->buf); } return 0; @@ -1077,18 +1181,15 @@ PyType_Spec bytesio_spec = { */ static int -bytesiobuf_getbuffer(PyObject *op, Py_buffer *view, int flags) +bytesiobuf_getbuffer_lock_held(PyObject *op, Py_buffer *view, int flags) { bytesiobuf *obj = bytesiobuf_CAST(op); bytesio *b = bytesio_CAST(obj->source); - if (view == NULL) { - PyErr_SetString(PyExc_BufferError, - "bytesiobuf_getbuffer: view==NULL argument is obsolete"); - return -1; - } - if (b->exports == 0 && SHARED_BUF(b)) { - if (unshare_buffer(b, b->string_size) < 0) + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(b); + + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(b->exports) == 0 && SHARED_BUF(b)) { + if (unshare_buffer_lock_held(b, b->string_size) < 0) return -1; } @@ -1096,16 +1197,32 @@ bytesiobuf_getbuffer(PyObject *op, Py_buffer *view, int flags) (void)PyBuffer_FillInfo(view, op, PyBytes_AS_STRING(b->buf), b->string_size, 0, flags); - b->exports++; + FT_ATOMIC_ADD_SSIZE(b->exports, 1); return 0; } +static int +bytesiobuf_getbuffer(PyObject *op, Py_buffer *view, int flags) +{ + if (view == NULL) { + PyErr_SetString(PyExc_BufferError, + "bytesiobuf_getbuffer: view==NULL argument is obsolete"); + return -1; + } + + int ret; + Py_BEGIN_CRITICAL_SECTION(bytesiobuf_CAST(op)->source); + ret = bytesiobuf_getbuffer_lock_held(op, view, flags); + Py_END_CRITICAL_SECTION(); + return ret; +} + static void bytesiobuf_releasebuffer(PyObject *op, Py_buffer *Py_UNUSED(view)) { bytesiobuf *obj = bytesiobuf_CAST(op); bytesio *b = bytesio_CAST(obj->source); - b->exports--; + FT_ATOMIC_ADD_SSIZE(b->exports, -1); } static int diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index aaf4884d1732a9..6595dc937bbcf0 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -7,6 +7,7 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_io_BytesIO_readable__doc__, @@ -24,7 +25,13 @@ _io_BytesIO_readable_impl(bytesio *self); static PyObject * _io_BytesIO_readable(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_readable_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_readable_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_writable__doc__, @@ -42,7 +49,13 @@ _io_BytesIO_writable_impl(bytesio *self); static PyObject * _io_BytesIO_writable(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_writable_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_writable_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_seekable__doc__, @@ -60,7 +73,13 @@ _io_BytesIO_seekable_impl(bytesio *self); static PyObject * _io_BytesIO_seekable(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_seekable_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_seekable_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_flush__doc__, @@ -78,7 +97,13 @@ _io_BytesIO_flush_impl(bytesio *self); static PyObject * _io_BytesIO_flush(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_flush_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_flush_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_getbuffer__doc__, @@ -96,11 +121,18 @@ _io_BytesIO_getbuffer_impl(bytesio *self, PyTypeObject *cls); static PyObject * _io_BytesIO_getbuffer(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "getbuffer() takes no arguments"); - return NULL; + goto exit; } - return _io_BytesIO_getbuffer_impl((bytesio *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_getbuffer_impl((bytesio *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_io_BytesIO_getvalue__doc__, @@ -118,7 +150,13 @@ _io_BytesIO_getvalue_impl(bytesio *self); static PyObject * _io_BytesIO_getvalue(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_getvalue_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_getvalue_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_isatty__doc__, @@ -138,7 +176,13 @@ _io_BytesIO_isatty_impl(bytesio *self); static PyObject * _io_BytesIO_isatty(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_isatty_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_isatty_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_tell__doc__, @@ -156,7 +200,13 @@ _io_BytesIO_tell_impl(bytesio *self); static PyObject * _io_BytesIO_tell(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_tell_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_tell_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO_read__doc__, @@ -190,7 +240,9 @@ _io_BytesIO_read(PyObject *self, PyObject *const *args, Py_ssize_t nargs) goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_read_impl((bytesio *)self, size); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -227,7 +279,9 @@ _io_BytesIO_read1(PyObject *self, PyObject *const *args, Py_ssize_t nargs) goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_read1_impl((bytesio *)self, size); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -265,7 +319,9 @@ _io_BytesIO_readline(PyObject *self, PyObject *const *args, Py_ssize_t nargs) goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_readline_impl((bytesio *)self, size); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -301,7 +357,9 @@ _io_BytesIO_readlines(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } arg = args[0]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_readlines_impl((bytesio *)self, arg); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -332,7 +390,9 @@ _io_BytesIO_readinto(PyObject *self, PyObject *arg) _PyArg_BadArgument("readinto", "argument", "read-write bytes-like object", arg); goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_readinto_impl((bytesio *)self, &buffer); + Py_END_CRITICAL_SECTION(); exit: /* Cleanup for buffer */ @@ -356,13 +416,13 @@ PyDoc_STRVAR(_io_BytesIO_truncate__doc__, {"truncate", _PyCFunction_CAST(_io_BytesIO_truncate), METH_FASTCALL, _io_BytesIO_truncate__doc__}, static PyObject * -_io_BytesIO_truncate_impl(bytesio *self, Py_ssize_t size); +_io_BytesIO_truncate_impl(bytesio *self, PyObject *size); static PyObject * _io_BytesIO_truncate(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - Py_ssize_t size = ((bytesio *)self)->pos; + PyObject *size = Py_None; if (!_PyArg_CheckPositional("truncate", nargs, 0, 1)) { goto exit; @@ -370,11 +430,11 @@ _io_BytesIO_truncate(PyObject *self, PyObject *const *args, Py_ssize_t nargs) if (nargs < 1) { goto skip_optional; } - if (!_Py_convert_optional_to_ssize_t(args[0], &size)) { - goto exit; - } + size = args[0]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_truncate_impl((bytesio *)self, size); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -428,7 +488,9 @@ _io_BytesIO_seek(PyObject *self, PyObject *const *args, Py_ssize_t nargs) goto exit; } skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_seek_impl((bytesio *)self, pos, whence); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -453,7 +515,9 @@ _io_BytesIO_write(PyObject *self, PyObject *b) { PyObject *return_value = NULL; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_write_impl((bytesio *)self, b); + Py_END_CRITICAL_SECTION(); return return_value; } @@ -479,7 +543,9 @@ _io_BytesIO_writelines(PyObject *self, PyObject *lines) { PyObject *return_value = NULL; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO_writelines_impl((bytesio *)self, lines); + Py_END_CRITICAL_SECTION(); return return_value; } @@ -499,7 +565,13 @@ _io_BytesIO_close_impl(bytesio *self); static PyObject * _io_BytesIO_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _io_BytesIO_close_impl((bytesio *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _io_BytesIO_close_impl((bytesio *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_io_BytesIO___init____doc__, @@ -558,9 +630,11 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) } initvalue = fastargs[0]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _io_BytesIO___init___impl((bytesio *)self, initvalue); + Py_END_CRITICAL_SECTION(); exit: return return_value; } -/*[clinic end generated code: output=6dbfd82f4e9d4ef3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=daa81dfdae5ccc57 input=a9049054013a1b77]*/ diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 8fcb27049d6c7c..26537fc6395e9f 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -4,6 +4,7 @@ #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stdbool.h> // bool #ifdef HAVE_UNISTD_H @@ -570,9 +571,7 @@ fileio_dealloc(PyObject *op) PyMem_Free(self->stat_atopen); self->stat_atopen = NULL; } - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)fileio_clear(op); PyTypeObject *tp = Py_TYPE(op); diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index cd4c7e7cead277..17a9d2800e9d7a 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -14,6 +14,7 @@ #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_object.h" // _PyType_HasFeature() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> // offsetof() #include "_iomodule.h" @@ -383,8 +384,7 @@ iobase_dealloc(PyObject *op) } PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); Py_CLEAR(self->dict); tp->tp_free(self); Py_DECREF(tp); @@ -938,14 +938,21 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n) return res; } - n = PyNumber_AsSsize_t(res, PyExc_ValueError); + Py_ssize_t bytes_filled = PyNumber_AsSsize_t(res, PyExc_ValueError); Py_DECREF(res); - if (n == -1 && PyErr_Occurred()) { + if (bytes_filled == -1 && PyErr_Occurred()) { Py_DECREF(b); return NULL; } + if (bytes_filled < 0 || bytes_filled > n) { + Py_DECREF(b); + PyErr_Format(PyExc_ValueError, + "readinto returned %zd outside buffer size %zd", + bytes_filled, n); + return NULL; + } - res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), n); + res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), bytes_filled); Py_DECREF(b); return res; } diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 9d1bfa3ea05cea..b073200bf21204 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -1,6 +1,7 @@ #include "Python.h" #include <stddef.h> // offsetof() #include "pycore_object.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" /* Implementation note: the buffer is always at least one character longer @@ -628,9 +629,7 @@ stringio_dealloc(PyObject *op) } PyUnicodeWriter_Discard(self->writer); (void)stringio_clear(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86328e46a7b131..a136987fb52a4a 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -16,6 +16,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" @@ -1469,8 +1470,7 @@ textiowrapper_dealloc(PyObject *op) return; self->ok = 0; _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)textiowrapper_clear(op); tp->tp_free(self); Py_DECREF(tp); @@ -1578,6 +1578,8 @@ _io_TextIOWrapper_detach_impl(textio *self) static int _textiowrapper_writeflush(textio *self) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + if (self->pending_bytes == NULL) return 0; @@ -2842,7 +2844,7 @@ _io_TextIOWrapper_tell_impl(textio *self) current pos */ skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); skip_back = 1; - assert(skip_back <= PyBytes_GET_SIZE(next_input)); + assert(skip_bytes <= PyBytes_GET_SIZE(next_input)); input = PyBytes_AS_STRING(next_input); while (skip_bytes > 0) { /* Decode up to temptative start point */ @@ -3173,8 +3175,9 @@ _io_TextIOWrapper_close_impl(textio *self) } static PyObject * -textiowrapper_iternext(PyObject *op) +textiowrapper_iternext_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyObject *line; textio *self = textio_CAST(op); @@ -3210,6 +3213,16 @@ textiowrapper_iternext(PyObject *op) return line; } +static PyObject * +textiowrapper_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = textiowrapper_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] @critical_section @getter diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index 3e783b9da45652..950b7fe241ccde 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -10,6 +10,7 @@ #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #ifdef HAVE_WINDOWS_CONSOLE_IO @@ -518,8 +519,7 @@ winconsoleio_dealloc(PyObject *op) if (_PyIOBase_finalize(op) < 0) return; _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); Py_CLEAR(self->dict); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..2f82a69a1579dc 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1228,16 +1228,23 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; PyEncoderObject *s; - PyObject *markers = Py_None, *defaultfn, *encoder, *indent, *key_separator; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator; int sort_keys, skipkeys, allow_nan; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!?OOOUUppp:make_encoder", kwlist, - &PyDict_Type, &markers, &defaultfn, &encoder, &indent, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan)) return NULL; + if (markers != Py_None && !PyDict_Check(markers)) { + PyErr_Format(PyExc_TypeError, + "make_encoder() argument 1 must be dict or None, " + "not %.200s", Py_TYPE(markers)->tp_name); + return NULL; + } + s = (PyEncoderObject *)type->tp_alloc(type, 0); if (s == NULL) return NULL; @@ -1476,13 +1483,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return PyUnicodeWriter_WriteUTF8(writer, "null", 4); + return PyUnicodeWriter_WriteASCII(writer, "null", 4); } else if (obj == Py_True) { - return PyUnicodeWriter_WriteUTF8(writer, "true", 4); + return PyUnicodeWriter_WriteASCII(writer, "true", 4); } else if (obj == Py_False) { - return PyUnicodeWriter_WriteUTF8(writer, "false", 5); + return PyUnicodeWriter_WriteASCII(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1609,6 +1616,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs if (*first) { *first = false; + if (s->indent != Py_None) { + if (write_newline_indent(writer, indent_level, indent_cache) < 0) { + Py_DECREF(keystr); + return -1; + } + } } else { if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { @@ -1649,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + return PyUnicodeWriter_WriteASCII(writer, "{}", 2); } if (s->markers != Py_None) { @@ -1676,11 +1689,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (s->indent != Py_None) { indent_level++; separator = get_item_separator(s, indent_level, indent_cache); - if (separator == NULL || - write_newline_indent(writer, indent_level, indent_cache) < 0) - { + if (separator == NULL) goto bail; - } } if (s->sort_keys || !PyDict_CheckExact(dct)) { @@ -1720,7 +1730,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, goto bail; Py_CLEAR(ident); } - if (s->indent != Py_None) { + if (s->indent != Py_None && !first) { indent_level--; if (write_newline_indent(writer, indent_level, indent_cache) < 0) { goto bail; @@ -1753,7 +1763,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); + return PyUnicodeWriter_WriteASCII(writer, "[]", 2); } if (s->markers != Py_None) { diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index ad618398d5b824..cb448b14d8cd63 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -87,6 +87,41 @@ copy_grouping(const char* s) return result; } +#if defined(MS_WINDOWS) + +// 16 is the number of elements in the szCodePage field +// of the __crt_locale_strings structure. +#define MAX_CP_LEN 15 + +static int +check_locale_name(const char *locale, const char *end) +{ + size_t len = end ? (size_t)(end - locale) : strlen(locale); + const char *dot = memchr(locale, '.', len); + if (dot && locale + len - dot - 1 > MAX_CP_LEN) { + return -1; + } + return 0; +} + +static int +check_locale_name_all(const char *locale) +{ + const char *start = locale; + while (1) { + const char *end = strchr(start, ';'); + if (check_locale_name(start, end) < 0) { + return -1; + } + if (end == NULL) { + break; + } + start = end + 1; + } + return 0; +} +#endif + /*[clinic input] _locale.setlocale @@ -111,6 +146,18 @@ _locale_setlocale_impl(PyObject *module, int category, const char *locale) "invalid locale category"); return NULL; } + if (locale) { + if ((category == LC_ALL + ? check_locale_name_all(locale) + : check_locale_name(locale, NULL)) < 0) + { + /* Debug assertion failure on Windows. + * _Py_BEGIN_SUPPRESS_IPH/_Py_END_SUPPRESS_IPH do not help. */ + PyErr_SetString(get_locale_state(module)->Error, + "unsupported locale setting"); + return NULL; + } + } #endif if (locale) { @@ -410,7 +457,9 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str) /* assume no change in size, first */ n1 = n1 + 1; - buf = PyMem_New(wchar_t, n1); + /* Yet another +1 is needed to work around a platform bug in wcsxfrm() + * on macOS. See gh-130567. */ + buf = PyMem_New(wchar_t, n1+1); if (!buf) { PyErr_NoMemory(); goto exit; @@ -436,7 +485,54 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str) goto exit; } } - result = PyUnicode_FromWideChar(buf, n2); + /* The result is just a sequence of integers, they are not necessary + Unicode code points, so PyUnicode_FromWideChar() cannot be used + here. For example, 0xD83D 0xDC0D should not be larger than 0xFF41. + */ +#if SIZEOF_WCHAR_T == 4 + { + /* Some codes can exceed the range of Unicode code points + (0 - 0x10FFFF), so they cannot be directly used in + PyUnicode_FromKindAndData(). They should be first encoded in + a way that preserves the lexicographical order. + + Codes in the range 0-0xFFFF represent themself. + Codes larger than 0xFFFF are encoded as a pair: + * 0x1xxxx -- the highest 16 bits + * 0x0xxxx -- the lowest 16 bits + */ + size_t n3 = 0; + for (size_t i = 0; i < n2; i++) { + if ((Py_UCS4)buf[i] > 0x10000u) { + n3++; + } + } + if (n3) { + n3 += n2; // no integer overflow + Py_UCS4 *buf2 = PyMem_New(Py_UCS4, n3); + if (buf2 == NULL) { + PyErr_NoMemory(); + goto exit; + } + size_t j = 0; + for (size_t i = 0; i < n2; i++) { + Py_UCS4 c = (Py_UCS4)buf[i]; + if (c > 0x10000u) { + buf2[j++] = (c >> 16) | 0x10000u; + buf2[j++] = c & 0xFFFFu; + } + else { + buf2[j++] = c; + } + } + assert(j == n3); + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf2, n3); + PyMem_Free(buf2); + goto exit; + } + } +#endif + result = PyUnicode_FromKindAndData(sizeof(wchar_t), buf, n2); exit: PyMem_Free(buf); PyMem_Free(s); @@ -692,7 +788,17 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; char *oldloc = NULL; if (langinfo_constants[i].category != LC_CTYPE - && !is_all_ascii(result) + && *result && ( +#ifdef __GLIBC__ + // gh-133740: Always change the locale for ALT_DIGITS and ERA +# ifdef ALT_DIGITS + item == ALT_DIGITS || +# endif +# ifdef ERA + item == ERA || +# endif +#endif + !is_all_ascii(result)) && change_locale(langinfo_constants[i].category, &oldloc) < 0) { return NULL; diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 626c176715bdac..73602723d19bcf 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -534,6 +534,7 @@ static int statsForEntry(rotating_node_t *node, void *arg) } /*[clinic input] +@critical_section _lsprof.Profiler.getstats cls: defining_class @@ -565,7 +566,7 @@ profiler_subentry objects: static PyObject * _lsprof_Profiler_getstats_impl(ProfilerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=1806ef720019ee03 input=445e193ef4522902]*/ +/*[clinic end generated code: output=1806ef720019ee03 input=3dc69eb85ed73d91]*/ { statscollector_t collect; collect.state = _PyType_GetModuleState(cls); @@ -613,6 +614,7 @@ setBuiltins(ProfilerObject *pObj, int nvalue) } /*[clinic input] +@critical_section _lsprof.Profiler._pystart_callback code: object @@ -624,7 +626,7 @@ _lsprof.Profiler._pystart_callback static PyObject * _lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, PyObject *instruction_offset) -/*[clinic end generated code: output=5fec8b7ad5ed25e8 input=b166e6953c579cda]*/ +/*[clinic end generated code: output=5fec8b7ad5ed25e8 input=b61a0e79cf1f8499]*/ { ptrace_enter_call((PyObject*)self, (void *)code, code); @@ -632,6 +634,29 @@ _lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, } /*[clinic input] +@critical_section +_lsprof.Profiler._pythrow_callback + + code: object + instruction_offset: object + exception: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception) +/*[clinic end generated code: output=0a32988919dfb94c input=60c7f272206d3758]*/ +{ + ptrace_enter_call((PyObject*)self, (void *)code, code); + + Py_RETURN_NONE; +} + +/*[clinic input] +@critical_section _lsprof.Profiler._pyreturn_callback code: object @@ -646,7 +671,7 @@ _lsprof_Profiler__pyreturn_callback_impl(ProfilerObject *self, PyObject *code, PyObject *instruction_offset, PyObject *retval) -/*[clinic end generated code: output=9e2f6fc1b882c51e input=667ffaeb2fa6fd1f]*/ +/*[clinic end generated code: output=9e2f6fc1b882c51e input=0ddcc1ec53faa928]*/ { ptrace_leave_call((PyObject*)self, (void *)code); @@ -682,6 +707,7 @@ PyObject* get_cfunc_from_callable(PyObject* callable, PyObject* self_arg, PyObje } /*[clinic input] +@critical_section _lsprof.Profiler._ccall_callback code: object @@ -696,7 +722,7 @@ static PyObject * _lsprof_Profiler__ccall_callback_impl(ProfilerObject *self, PyObject *code, PyObject *instruction_offset, PyObject *callable, PyObject *self_arg) -/*[clinic end generated code: output=152db83cabd18cad input=0e66687cfb95c001]*/ +/*[clinic end generated code: output=152db83cabd18cad input=2fc1e0630ee5e32b]*/ { if (self->flags & POF_BUILTINS) { PyObject* cfunc = get_cfunc_from_callable(callable, self_arg, self->missing); @@ -712,6 +738,7 @@ _lsprof_Profiler__ccall_callback_impl(ProfilerObject *self, PyObject *code, } /*[clinic input] +@critical_section _lsprof.Profiler._creturn_callback code: object @@ -727,7 +754,7 @@ _lsprof_Profiler__creturn_callback_impl(ProfilerObject *self, PyObject *code, PyObject *instruction_offset, PyObject *callable, PyObject *self_arg) -/*[clinic end generated code: output=1e886dde8fed8fb0 input=b18afe023746923a]*/ +/*[clinic end generated code: output=1e886dde8fed8fb0 input=bdc246d6b5b8714a]*/ { if (self->flags & POF_BUILTINS) { PyObject* cfunc = get_cfunc_from_callable(callable, self_arg, self->missing); @@ -747,7 +774,7 @@ static const struct { } callback_table[] = { {PY_MONITORING_EVENT_PY_START, "_pystart_callback"}, {PY_MONITORING_EVENT_PY_RESUME, "_pystart_callback"}, - {PY_MONITORING_EVENT_PY_THROW, "_pystart_callback"}, + {PY_MONITORING_EVENT_PY_THROW, "_pythrow_callback"}, {PY_MONITORING_EVENT_PY_RETURN, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_YIELD, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_UNWIND, "_pyreturn_callback"}, @@ -759,6 +786,7 @@ static const struct { /*[clinic input] +@critical_section _lsprof.Profiler.enable subcalls: bool = True @@ -775,7 +803,7 @@ Start collecting profiling information. static PyObject * _lsprof_Profiler_enable_impl(ProfilerObject *self, int subcalls, int builtins) -/*[clinic end generated code: output=1e747f9dc1edd571 input=9ab81405107ab7f1]*/ +/*[clinic end generated code: output=1e747f9dc1edd571 input=0b88115b1c796173]*/ { int all_events = 0; if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0) { @@ -848,6 +876,7 @@ flush_unmatched(ProfilerObject *pObj) /*[clinic input] +@critical_section _lsprof.Profiler.disable Stop collecting profiling information. @@ -855,7 +884,7 @@ Stop collecting profiling information. static PyObject * _lsprof_Profiler_disable_impl(ProfilerObject *self) -/*[clinic end generated code: output=838cffef7f651870 input=05700b3fc68d1f50]*/ +/*[clinic end generated code: output=838cffef7f651870 input=f7e4787cae20f7f6]*/ { if (self->flags & POF_EXT_TIMER) { PyErr_SetString(PyExc_RuntimeError, @@ -907,6 +936,7 @@ _lsprof_Profiler_disable_impl(ProfilerObject *self) } /*[clinic input] +@critical_section _lsprof.Profiler.clear Clear all profiling information collected so far. @@ -914,7 +944,7 @@ Clear all profiling information collected so far. static PyObject * _lsprof_Profiler_clear_impl(ProfilerObject *self) -/*[clinic end generated code: output=dd1c668fb84b1335 input=fbe1f88c28be4f98]*/ +/*[clinic end generated code: output=dd1c668fb84b1335 input=4aab219d5d7a9bec]*/ { if (self->flags & POF_EXT_TIMER) { PyErr_SetString(PyExc_RuntimeError, @@ -1002,6 +1032,7 @@ static PyMethodDef profiler_methods[] = { _LSPROF_PROFILER_DISABLE_METHODDEF _LSPROF_PROFILER_CLEAR_METHODDEF _LSPROF_PROFILER__PYSTART_CALLBACK_METHODDEF + _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF _LSPROF_PROFILER__PYRETURN_CALLBACK_METHODDEF _LSPROF_PROFILER__CCALL_CALLBACK_METHODDEF _LSPROF_PROFILER__CRETURN_CALLBACK_METHODDEF diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index f9b4c2a170e9c5..462c2181fa6036 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -17,6 +17,7 @@ #include <lzma.h> +#include "pycore_long.h" // _PyLong_UInt32_Converter() // Blocks output buffer wrappers #include "pycore_blocks_output_buffer.h" @@ -223,8 +224,6 @@ FUNCNAME(PyObject *obj, void *ptr) \ return 1; \ } -INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter) -INT_TYPE_CONVERTER_FUNC(uint64_t, uint64_converter) INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter) INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter) INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter) @@ -254,7 +253,7 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec) return NULL; } if (preset_obj != NULL) { - int ok = uint32_converter(preset_obj, &preset); + int ok = _PyLong_UInt32_Converter(preset_obj, &preset); Py_DECREF(preset_obj); if (!ok) { return NULL; @@ -275,14 +274,14 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec) if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OOO&O&O&O&O&O&O&O&", optnames, &id, &preset_obj, - uint32_converter, &options->dict_size, - uint32_converter, &options->lc, - uint32_converter, &options->lp, - uint32_converter, &options->pb, + _PyLong_UInt32_Converter, &options->dict_size, + _PyLong_UInt32_Converter, &options->lc, + _PyLong_UInt32_Converter, &options->lp, + _PyLong_UInt32_Converter, &options->pb, lzma_mode_converter, &options->mode, - uint32_converter, &options->nice_len, + _PyLong_UInt32_Converter, &options->nice_len, lzma_mf_converter, &options->mf, - uint32_converter, &options->depth)) { + _PyLong_UInt32_Converter, &options->depth)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for LZMA filter"); PyMem_Free(options); @@ -301,7 +300,7 @@ parse_filter_spec_delta(_lzma_state *state, PyObject *spec) lzma_options_delta *options; if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames, - &id, uint32_converter, &dist)) { + &id, _PyLong_UInt32_Converter, &dist)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for delta filter"); return NULL; @@ -325,7 +324,7 @@ parse_filter_spec_bcj(_lzma_state *state, PyObject *spec) lzma_options_bcj *options; if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames, - &id, uint32_converter, &start_offset)) { + &id, _PyLong_UInt32_Converter, &start_offset)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for BCJ filter"); return NULL; @@ -806,7 +805,7 @@ Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return NULL; } - if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) { + if (preset_obj != Py_None && !_PyLong_UInt32_Converter(preset_obj, &preset)) { return NULL; } @@ -1226,7 +1225,7 @@ _lzma_LZMADecompressor_impl(PyTypeObject *type, int format, "Cannot specify memory limit with FORMAT_RAW"); return NULL; } - if (!uint64_converter(memlimit, &memlimit_)) { + if (!_PyLong_UInt64_Converter(memlimit, &memlimit_)) { return NULL; } } diff --git a/Modules/_opcode.c b/Modules/_opcode.c index c295f7b3152577..ef271b6ef566b9 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -5,7 +5,7 @@ #include "Python.h" #include "compile.h" #include "opcode.h" -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL_MAX #include "pycore_code.h" #include "pycore_compile.h" #include "pycore_intrinsics.h" diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..2e74d5688629ff 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5539,17 +5539,16 @@ static int load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes) { PyObject *obj; - Py_ssize_t size; + long size; char *s; if (_Unpickler_Read(self, st, &s, nbytes) < 0) return -1; - size = calc_binsize(s, nbytes); + size = calc_binint(s, nbytes); if (size < 0) { - PyErr_Format(st->UnpicklingError, - "BINSTRING exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX); + PyErr_SetString(st->UnpicklingError, + "BINSTRING pickle has negative byte count"); return -1; } diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c index b542f86b6fe8da..e0a95318afe988 100644 --- a/Modules/_posixsubprocess.c +++ b/Modules/_posixsubprocess.c @@ -514,7 +514,13 @@ _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep, proc_fd_dir = NULL; else #endif +#if defined(_AIX) + char fd_path[PATH_MAX]; + snprintf(fd_path, sizeof(fd_path), "/proc/%ld/fd", (long)getpid()); + proc_fd_dir = opendir(fd_path); +#else proc_fd_dir = opendir(FD_DIR); +#endif if (!proc_fd_dir) { /* No way to get a list of open fds. */ _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len, @@ -630,7 +636,7 @@ reset_signal_handlers(const sigset_t *child_sigmask) * (v)fork to set things up and call exec(). * * All of the code in this function must only use async-signal-safe functions, - * listed at `man 7 signal` or + * listed at `man 7 signal-safety` or * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. * * This restriction is documented at diff --git a/Modules/_queuemodule.c b/Modules/_queuemodule.c index 3ee14b61b821d6..01235c77bd7db8 100644 --- a/Modules/_queuemodule.c +++ b/Modules/_queuemodule.c @@ -7,6 +7,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_parking_lot.h" #include "pycore_time.h" // _PyTime_FromSecondsObject() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stdbool.h> #include <stddef.h> // offsetof() @@ -221,9 +222,7 @@ simplequeue_dealloc(PyObject *op) PyObject_GC_UnTrack(self); (void)simplequeue_clear(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index d5bac2f5b78120..3ffeff32c0dac1 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -497,34 +497,32 @@ _random_Random_setstate_impl(RandomObject *self, PyObject *state) _random.Random.getrandbits self: self(type="RandomObject *") - k: int + k: uint64 / getrandbits(k) -> x. Generates an int with k random bits. [clinic start generated code]*/ static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k) -/*[clinic end generated code: output=b402f82a2158887f input=87603cd60f79f730]*/ +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k) +/*[clinic end generated code: output=c30ef8435f3433cf input=64226ac13bb4d2a3]*/ { - int i, words; + Py_ssize_t i, words; uint32_t r; uint32_t *wordarray; PyObject *result; - if (k < 0) { - PyErr_SetString(PyExc_ValueError, - "number of bits must be non-negative"); - return NULL; - } - if (k == 0) return PyLong_FromLong(0); if (k <= 32) /* Fast path */ return PyLong_FromUnsignedLong(genrand_uint32(self) >> (32 - k)); - words = (k - 1) / 32 + 1; + if ((k - 1u) / 32u + 1u > PY_SSIZE_T_MAX / 4u) { + PyErr_NoMemory(); + return NULL; + } + words = (Py_ssize_t)((k - 1u) / 32u + 1u); wordarray = (uint32_t *)PyMem_Malloc(words * 4); if (wordarray == NULL) { PyErr_NoMemory(); diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 9314ddd9bed5d7..b46538b76df16e 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1,12 +1,15 @@ + /****************************************************************************** + * Python Remote Debugging Module + * + * This module provides functionality to debug Python processes remotely by + * reading their memory and reconstructing stack traces and asyncio task states. + ******************************************************************************/ + #define _GNU_SOURCE -#include <errno.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> +/* ============================================================================ + * HEADERS AND INCLUDES + * ============================================================================ */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -19,10 +22,66 @@ #include <internal/pycore_stackref.h> // Py_TAG_BITS #include "../Python/remote_debug.h" +// gh-141784: Python.h header must be included first, before system headers. +// Otherwise, some types such as ino_t can be defined differently, causing ABI +// issues. +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + #ifndef HAVE_PROCESS_VM_READV # define HAVE_PROCESS_VM_READV 0 #endif +/* ============================================================================ + * TYPE DEFINITIONS AND STRUCTURES + * ============================================================================ */ + +#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset))) +#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS)) +#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset)))) + +/* Size macros for opaque buffers */ +#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject) +#define SIZEOF_CODE_OBJ sizeof(PyCodeObject) +#define SIZEOF_GEN_OBJ sizeof(PyGenObject) +#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame) +#define SIZEOF_LLIST_NODE sizeof(struct llist_node) +#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t) +#define SIZEOF_PYOBJECT sizeof(PyObject) +#define SIZEOF_SET_OBJ sizeof(PySetObject) +#define SIZEOF_TASK_OBJ 4096 +#define SIZEOF_THREAD_STATE sizeof(PyThreadState) +#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject) +#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject) +#define SIZEOF_LONG_OBJ sizeof(PyLongObject) + +// Calculate the minimum buffer size needed to read interpreter state fields +// We need to read code_object_generation and potentially tlbc_generation +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifdef Py_GIL_DISABLED +#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#else +#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#endif +#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) + + + +// Copied from Modules/_asynciomodule.c because it's not exported + struct _Py_AsyncioModuleDebugOffsets { struct _asyncio_task_object { uint64_t size; @@ -45,147 +104,539 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; -// Helper to chain exceptions and avoid repetitions +/* ============================================================================ + * STRUCTSEQ TYPE DEFINITIONS + * ============================================================================ */ + +// TaskInfo structseq type - replaces 4-tuple (task_id, task_name, coroutine_stack, awaited_by) +static PyStructSequence_Field TaskInfo_fields[] = { + {"task_id", "Task ID (memory address)"}, + {"task_name", "Task name"}, + {"coroutine_stack", "Coroutine call stack"}, + {"awaited_by", "Tasks awaiting this task"}, + {NULL} +}; + +static PyStructSequence_Desc TaskInfo_desc = { + "_remote_debugging.TaskInfo", + "Information about an asyncio task", + TaskInfo_fields, + 4 +}; + +// FrameInfo structseq type - replaces 3-tuple (filename, lineno, funcname) +static PyStructSequence_Field FrameInfo_fields[] = { + {"filename", "Source code filename"}, + {"lineno", "Line number"}, + {"funcname", "Function name"}, + {NULL} +}; + +static PyStructSequence_Desc FrameInfo_desc = { + "_remote_debugging.FrameInfo", + "Information about a frame", + FrameInfo_fields, + 3 +}; + +// CoroInfo structseq type - replaces 2-tuple (call_stack, task_name) +static PyStructSequence_Field CoroInfo_fields[] = { + {"call_stack", "Coroutine call stack"}, + {"task_name", "Task name"}, + {NULL} +}; + +static PyStructSequence_Desc CoroInfo_desc = { + "_remote_debugging.CoroInfo", + "Information about a coroutine", + CoroInfo_fields, + 2 +}; + +// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info) +static PyStructSequence_Field ThreadInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"frame_info", "Frame information"}, + {NULL} +}; + +static PyStructSequence_Desc ThreadInfo_desc = { + "_remote_debugging.ThreadInfo", + "Information about a thread", + ThreadInfo_fields, + 2 +}; + +// AwaitedInfo structseq type - replaces 2-tuple (tid, awaited_by_list) +static PyStructSequence_Field AwaitedInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"awaited_by", "List of tasks awaited by this thread"}, + {NULL} +}; + +static PyStructSequence_Desc AwaitedInfo_desc = { + "_remote_debugging.AwaitedInfo", + "Information about what a thread is awaiting", + AwaitedInfo_fields, + 2 +}; + +typedef struct { + PyObject *func_name; + PyObject *file_name; + int first_lineno; + PyObject *linetable; // bytes + uintptr_t addr_code_adaptive; +} CachedCodeMetadata; + +typedef struct { + /* Types */ + PyTypeObject *RemoteDebugging_Type; + PyTypeObject *TaskInfo_Type; + PyTypeObject *FrameInfo_Type; + PyTypeObject *CoroInfo_Type; + PyTypeObject *ThreadInfo_Type; + PyTypeObject *AwaitedInfo_Type; +} RemoteDebuggingState; + +typedef struct { + PyObject_HEAD + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; + int async_debug_offsets_available; + struct _Py_AsyncioModuleDebugOffsets async_debug_offsets; + uintptr_t interpreter_addr; + uintptr_t tstate_addr; + uint64_t code_object_generation; + _Py_hashtable_t *code_object_cache; + int debug; + int only_active_thread; + RemoteDebuggingState *cached_state; // Cached module state +#ifdef Py_GIL_DISABLED + // TLBC cache invalidation tracking + uint32_t tlbc_generation; // Track TLBC index pool changes + _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address +#endif +} RemoteUnwinderObject; + +#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op)) + +typedef struct +{ + int lineno; + int end_lineno; + int column; + int end_column; +} LocationInfo; + +typedef struct { + uintptr_t remote_addr; + size_t size; + void *local_copy; +} StackChunkInfo; + +typedef struct { + StackChunkInfo *chunks; + size_t count; +} StackChunkList; + +#include "clinic/_remote_debugging_module.c.h" + +/*[clinic input] +module _remote_debugging +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/ + + +/* ============================================================================ + * FORWARD DECLARATIONS + * ============================================================================ */ + +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr +); + +typedef int (*thread_processor_func)( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +); + +typedef int (*set_entry_processor_func)( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +); + + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to +); + +static int +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +); + +/* Forward declarations for task parsing functions */ +static int parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* address_of_code_object, + uintptr_t* previous_frame +); + +static int +parse_async_frame_chain( + RemoteUnwinderObject *unwinder, + PyObject *calls, + uintptr_t address_of_thread, + uintptr_t running_task_code_obj +); + +static int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr); +static int read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size); + +static int process_task_and_waiters(RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result); +static int process_task_awaited_by(RemoteUnwinderObject *unwinder, uintptr_t task_address, set_entry_processor_func processor, void *context); +static int find_running_task_in_thread(RemoteUnwinderObject *unwinder, uintptr_t thread_state_addr, uintptr_t *running_task_addr); +static int get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr); +static int append_awaited_by(RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, PyObject *result); + +/* ============================================================================ + * UTILITY FUNCTIONS AND HELPERS + * ============================================================================ */ + +#define set_exception_cause(unwinder, exc_type, message) \ + if (unwinder->debug) { \ + _set_debug_exception_cause(exc_type, message); \ + } + static void -chain_exceptions(PyObject *exception, const char *string) +cached_code_metadata_destroy(void *ptr) { - PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(exception, string); - _PyErr_ChainExceptions1(exc); + CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr; + Py_DECREF(meta->func_name); + Py_DECREF(meta->file_name); + Py_DECREF(meta->linetable); + PyMem_RawFree(meta); } -// Get the PyAsyncioDebug section address for any platform -static uintptr_t -_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +static inline RemoteDebuggingState * +RemoteDebugging_GetState(PyObject *module) { - uintptr_t address; - -#ifdef MS_WINDOWS - // On Windows, search for asyncio debug in executable or DLL - address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); -#elif defined(__linux__) - // On Linux, search for asyncio debug in executable or DLL - address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); -#elif defined(__APPLE__) && TARGET_OS_OSX - // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - if (address == 0) { - PyErr_Clear(); - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - } -#else - Py_UNREACHABLE(); -#endif + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (RemoteDebuggingState *)state; +} - return address; +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromType(PyTypeObject *type) +{ + PyObject *module = PyType_GetModule(type); + assert(module != NULL); + return RemoteDebugging_GetState(module); } -static inline int -read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromObject(PyObject *obj) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); - if (result < 0) { - return -1; + RemoteUnwinderObject *unwinder = (RemoteUnwinderObject *)obj; + if (unwinder->cached_state == NULL) { + unwinder->cached_state = RemoteDebugging_GetStateFromType(Py_TYPE(obj)); } - return 0; + return unwinder->cached_state; } static inline int -read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) +RemoteDebugging_InitState(RemoteDebuggingState *st) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); - if (result < 0) { - return -1; - } return 0; } static int -read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +is_prerelease_version(uint64_t version) { - if (read_ptr(handle, address, ptr_addr)) { - return -1; - } - *ptr_addr &= ~Py_TAG_BITS; - return 0; + return (version & 0xF0) != 0xF0; } -static int -read_char(proc_handle_t *handle, uintptr_t address, char *result) +static inline int +validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result); - if (res < 0) { + if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) { + // The remote is probably running a Python version predating debug offsets. + PyErr_SetString( + PyExc_RuntimeError, + "Can't determine the Python version of the remote process"); + return -1; + } + + // Assume debug offsets could change from one pre-release version to another, + // or one minor version to another, but are stable across patch versions. + if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach from a pre-release Python interpreter" + " to a process running a different Python version"); + return -1; + } + + if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach to a pre-release Python interpreter" + " from a process running a different Python version"); + return -1; + } + + unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF; + unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF; + + if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) { + PyErr_Format( + PyExc_RuntimeError, + "Can't attach from a Python %d.%d process to a Python %d.%d process", + PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor); + return -1; + } + + // The debug offsets differ between free threaded and non-free threaded builds. + if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach from a free-threaded Python process" + " to a process running a non-free-threaded version"); return -1; } + + if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach to a free-threaded Python process" + " from a process running a non-free-threaded version"); + return -1; + } + return 0; } +// Generic function to iterate through all threads static int -read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result); - if (res < 0) { +iterate_threads( + RemoteUnwinderObject *unwinder, + thread_processor_func processor, + void *context +) { + uintptr_t thread_state_addr; + unsigned long tid = 0; + + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + unwinder->interpreter_addr + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state"); return -1; } + + while (thread_state_addr != 0) { + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id, + sizeof(tid), + &tid)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID"); + return -1; + } + + // Call the processor function for this thread + if (processor(unwinder, thread_state_addr, tid, context) < 0) { + return -1; + } + + // Move to next thread + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state"); + return -1; + } + } + return 0; } +// Generic function to iterate through set entries static int -read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result); - if (res < 0) { +iterate_set_entries( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + set_entry_processor_func processor, + void *context +) { + char set_object[SIZEOF_SET_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, set_addr, + SIZEOF_SET_OBJ, set_object) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object"); return -1; } + + Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); + Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; + uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + + Py_ssize_t i = 0; + Py_ssize_t els = 0; + while (i < set_len && els < num_els) { + uintptr_t key_addr; + if (read_py_ptr(unwinder, table_ptr, &key_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key"); + return -1; + } + + if ((void*)key_addr != NULL) { + Py_ssize_t ref_cnt; + if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry ref count"); + return -1; + } + + if (ref_cnt) { + // Process this valid set entry + if (processor(unwinder, key_addr, context) < 0) { + return -1; + } + els++; + } + } + table_ptr += sizeof(void*) * 2; + i++; + } + return 0; } +// Processor function for task waiters +static int +process_waiter_task( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *result = (PyObject *)context; + return process_task_and_waiters(unwinder, key_addr, result); +} + +// Processor function for parsing tasks in sets +static int +process_task_parser( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *awaited_by = (PyObject *)context; + return parse_task(unwinder, key_addr, awaited_by); +} + +/* ============================================================================ + * MEMORY READING FUNCTIONS + * ============================================================================ */ + +#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \ +static inline int \ +read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \ +{ \ + int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \ + if (res < 0) { \ + set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \ + return -1; \ + } \ + return 0; \ +} + +DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory") +DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory") +DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory") + static int -read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr) +read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr); - if (res < 0) { + if (read_ptr(unwinder, address, ptr_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer"); return -1; } + *ptr_addr &= ~Py_TAG_BITS; return 0; } +/* ============================================================================ + * PYTHON OBJECT READING FUNCTIONS + * ============================================================================ */ + static PyObject * read_py_str( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, + RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->unicode_object.length, - sizeof(Py_ssize_t), - &len + // Read the entire PyUnicodeObject at once + char unicode_obj[SIZEOF_UNICODE_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_UNICODE_OBJ, + unicode_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject"); goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object"); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for string reading"); return NULL; } - size_t offset = debug_offsets->unicode_object.asciiobject_size; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = (size_t)unwinder->debug_offsets.unicode_object.asciiobject_size; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory"); goto err; } buf[len] = '\0'; result = PyUnicode_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyUnicode from remote string data"); goto err; } @@ -202,39 +653,52 @@ read_py_str( static PyObject * read_py_bytes( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, - uintptr_t address + RemoteUnwinderObject *unwinder, + uintptr_t address, + Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->bytes_object.ob_size, - sizeof(Py_ssize_t), - &len + // Read the entire PyBytesObject at once + char bytes_obj[SIZEOF_BYTES_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_BYTES_OBJ, + bytes_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject"); goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid bytes length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object"); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading"); return NULL; } - size_t offset = debug_offsets->bytes_object.ob_sval; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = (size_t)unwinder->debug_offsets.bytes_object.ob_sval; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory"); goto err; } buf[len] = '\0'; result = PyBytes_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data"); goto err; } @@ -249,45 +713,64 @@ read_py_bytes( return NULL; } - - static long -read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) +read_py_long( + RemoteUnwinderObject *unwinder, + uintptr_t address +) { unsigned int shift = PYLONG_BITS_IN_DIGIT; - Py_ssize_t size; - uintptr_t lv_tag; - - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->long_object.lv_tag, - sizeof(uintptr_t), - &lv_tag); + // Read the entire PyLongObject at once + char long_obj[SIZEOF_LONG_OBJ]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + (size_t)unwinder->debug_offsets.long_object.size, + long_obj); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject"); return -1; } + uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag); int negative = (lv_tag & 3) == 2; - size = lv_tag >> 3; + Py_ssize_t size = lv_tag >> 3; if (size == 0) { return 0; } - digit *digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); - if (!digits) { - PyErr_NoMemory(); - return -1; - } + // If the long object has inline digits, use them directly + digit *digits; + if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + // For small integers, digits are inline in the long_value.ob_digit array + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); + return -1; + } + memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + } else { + // For larger integers, we need to read the digits separately + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong"); + return -1; + } - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->long_object.ob_digit, - sizeof(digit) * size, - digits - ); - if (bytes_read < 0) { - goto error; + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address + (uintptr_t)unwinder->debug_offsets.long_object.ob_digit, + sizeof(digit) * size, + digits + ); + if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory"); + goto error; + } } long long value = 0; @@ -310,485 +793,840 @@ read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address return -1; } -static PyObject * -parse_task_name( - proc_handle_t *handle, - _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address -) { - uintptr_t task_name_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_name, - &task_name_addr); - if (err) { - return NULL; - } - - // The task name can be a long or a string so we need to check the type +/* ============================================================================ + * ASYNCIO DEBUG FUNCTIONS + * ============================================================================ */ - PyObject task_name_obj; - err = read_pyobj( - handle, - task_name_addr, - &task_name_obj); - if (err) { +// Get the PyAsyncioDebug section address for any platform +static uintptr_t +_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +{ + uintptr_t address; + +#ifdef MS_WINDOWS + // On Windows, search for asyncio debug in executable or DLL + address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__linux__) + // On Linux, search for asyncio debug in executable or DLL + address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__APPLE__) && TARGET_OS_OSX + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + PyErr_Clear(); + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + } + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#else + Py_UNREACHABLE(); +#endif + + return address; +} + +static int +read_async_debug( + RemoteUnwinderObject *unwinder +) { + uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle); + if (!async_debug_addr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to get AsyncioDebug address"); + return -1; + } + + size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets); + if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read AsyncioDebug offsets"); + } + return result; +} + +/* ============================================================================ + * ASYNCIO TASK PARSING FUNCTIONS + * ============================================================================ */ + +static PyObject * +parse_task_name( + RemoteUnwinderObject *unwinder, + uintptr_t task_address +) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_address, + (size_t)unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); return NULL; } - unsigned long flags; - err = read_unsigned_long( - handle, - (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags, - &flags); - if (err) { + uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); + + // The task name can be a long or a string so we need to check the type + char task_name_obj[SIZEOF_PYOBJECT]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_name_addr, + SIZEOF_PYOBJECT, + task_name_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name object"); + return NULL; + } + + // Now read the type object to get the flags + char type_obj[SIZEOF_TYPE_OBJ]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type), + SIZEOF_TYPE_OBJ, + type_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name type object"); return NULL; } - if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) { - long res = read_py_long(handle, offsets, task_name_addr); + if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) { + long res = read_py_long(unwinder, task_name_addr); if (res == -1) { - chain_exceptions(PyExc_RuntimeError, "Failed to get task name"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed"); return NULL; } return PyUnicode_FromFormat("Task-%d", res); } - if(!(flags & Py_TPFLAGS_UNICODE_SUBCLASS)) { + if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) { PyErr_SetString(PyExc_RuntimeError, "Invalid task name object"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name object is neither long nor unicode"); return NULL; } return read_py_str( - handle, - offsets, + unwinder, task_name_addr, 255 ); } -static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame -); +static int parse_task_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *awaited_by +) { + return process_task_awaited_by(unwinder, task_address, process_task_parser, awaited_by); +} static int -parse_coro_chain( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t coro_address, +handle_yield_from_frame( + RemoteUnwinderObject *unwinder, + uintptr_t gi_iframe_addr, + uintptr_t gen_type_addr, PyObject *render_to ) { - assert((void*)coro_address != NULL); - - uintptr_t gen_type_addr; - int err = read_ptr( - handle, - coro_address + offsets->pyobject.ob_type, - &gen_type_addr); - if (err) { - return -1; - } - - PyObject* name = NULL; - uintptr_t prev_frame; - if (parse_frame_object( - handle, - &name, - offsets, - coro_address + offsets->gen_object.gi_iframe, - &prev_frame) - < 0) - { - return -1; - } - - if (PyList_Append(render_to, name)) { - Py_DECREF(name); + // Read the entire interpreter frame at once + char iframe[SIZEOF_INTERP_FRAME]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + gi_iframe_addr, + SIZEOF_INTERP_FRAME, + iframe); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame in yield_from handler"); return -1; } - Py_DECREF(name); - int8_t gi_frame_state; - err = read_sized_int( - handle, - coro_address + offsets->gen_object.gi_frame_state, - &gi_frame_state, - sizeof(int8_t) - ); - if (err) { + if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) { + PyErr_SetString( + PyExc_RuntimeError, + "generator doesn't own its frame \\_o_/"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame ownership mismatch in yield_from"); return -1; } - if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { - char owner; - err = read_char( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.owner, - &owner - ); - if (err) { - return -1; - } - if (owner != FRAME_OWNED_BY_GENERATOR) { - PyErr_SetString( - PyExc_RuntimeError, - "generator doesn't own its frame \\_o_/"); - return -1; - } + uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); - uintptr_t stackpointer_addr; + if ((void*)stackpointer_addr != NULL) { + uintptr_t gi_await_addr; err = read_py_ptr( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.stackpointer, - &stackpointer_addr); + unwinder, + stackpointer_addr - sizeof(void*), + &gi_await_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await address"); return -1; } - if ((void*)stackpointer_addr != NULL) { - uintptr_t gi_await_addr; - err = read_py_ptr( - handle, - stackpointer_addr - sizeof(void*), - &gi_await_addr); + if ((void*)gi_await_addr != NULL) { + uintptr_t gi_await_addr_type_addr; + err = read_ptr( + unwinder, + gi_await_addr + (uintptr_t)unwinder->debug_offsets.pyobject.ob_type, + &gi_await_addr_type_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await type address"); return -1; } - if ((void*)gi_await_addr != NULL) { - uintptr_t gi_await_addr_type_addr; - int err = read_ptr( - handle, - gi_await_addr + offsets->pyobject.ob_type, - &gi_await_addr_type_addr); + if (gen_type_addr == gi_await_addr_type_addr) { + /* This needs an explanation. We always start with parsing + native coroutine / generator frames. Ultimately they + are awaiting on something. That something can be + a native coroutine frame or... an iterator. + If it's the latter -- we can't continue building + our chain. So the condition to bail out of this is + to do that when the type of the current coroutine + doesn't match the type of whatever it points to + in its cr_await. + */ + err = parse_coro_chain(unwinder, gi_await_addr, render_to); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain in yield_from"); return -1; } - - if (gen_type_addr == gi_await_addr_type_addr) { - /* This needs an explanation. We always start with parsing - native coroutine / generator frames. Ultimately they - are awaiting on something. That something can be - a native coroutine frame or... an iterator. - If it's the latter -- we can't continue building - our chain. So the condition to bail out of this is - to do that when the type of the current coroutine - doesn't match the type of whatever it points to - in its cr_await. - */ - err = parse_coro_chain( - handle, - offsets, - async_offsets, - gi_await_addr, - render_to - ); - if (err) { - return -1; - } - } } } - } return 0; } - static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task -); +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +) { + assert((void*)coro_address != NULL); + // Read the entire generator object at once + char gen_object[SIZEOF_GEN_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + coro_address, + SIZEOF_GEN_OBJ, + gen_object); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read generator object in coro chain"); + return -1; + } -static int -parse_task( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *render_to, - int recurse_task -) { - char is_task; - int err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_is_task, - &is_task); - if (err) { + int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state); + if (frame_state == FRAME_CLEARED) { + return 0; + } + + uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type); + + PyObject* name = NULL; + + // Parse the previous frame using the gi_iframe from local copy + uintptr_t prev_frame; + uintptr_t gi_iframe_addr = coro_address + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe; + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &name, gi_iframe_addr, &address_of_code_object, &prev_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain"); return -1; } - PyObject* result = PyList_New(0); - if (result == NULL) { + if (!name) { + return 0; + } + + if (PyList_Append(render_to, name)) { + Py_DECREF(name); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain"); return -1; } + Py_DECREF(name); + + if (frame_state == FRAME_SUSPENDED_YIELD_FROM) { + return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to); + } - PyObject *call_stack = PyList_New(0); + return 0; +} + +static PyObject* +create_task_result( + RemoteUnwinderObject *unwinder, + uintptr_t task_address +) { + PyObject* result = NULL; + PyObject *call_stack = NULL; + PyObject *tn = NULL; + char task_obj[SIZEOF_TASK_OBJ]; + uintptr_t coro_addr; + + // Create call_stack first since it's the first tuple element + call_stack = PyList_New(0); if (call_stack == NULL) { - goto err; + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list"); + goto error; } - if (PyList_Append(result, call_stack)) { - Py_DECREF(call_stack); - goto err; + + // Create task name/address for second tuple element + tn = PyLong_FromUnsignedLongLong(task_address); + if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address"); + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(call_stack); - if (is_task) { - PyObject *tn = NULL; - if (recurse_task) { - tn = parse_task_name( - handle, offsets, async_offsets, task_address); - } else { - tn = PyLong_FromUnsignedLongLong(task_address); - } - if (tn == NULL) { - goto err; - } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto err; - } - Py_DECREF(tn); + // Parse coroutine chain + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + (size_t)unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object for coro chain"); + goto error; + } - uintptr_t coro_addr; - err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_coro, - &coro_addr); - if (err) { - goto err; - } + coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); - if ((void*)coro_addr != NULL) { - err = parse_coro_chain( - handle, - offsets, - async_offsets, - coro_addr, - call_stack - ); - if (err) { - goto err; - } + if ((void*)coro_addr != NULL) { + if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain"); + goto error; + } - if (PyList_Reverse(call_stack)) { - goto err; - } + if (PyList_Reverse(call_stack)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack"); + goto error; } } - if (PyList_Append(render_to, result)) { - goto err; + // Create final CoroInfo result + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create CoroInfo"); + goto error; } - if (recurse_task) { - PyObject *awaited_by = PyList_New(0); - if (awaited_by == NULL) { - goto err; + // PyStructSequence_SetItem steals references, so we don't need to DECREF on success + PyStructSequence_SetItem(result, 0, call_stack); // This steals the reference + PyStructSequence_SetItem(result, 1, tn); // This steals the reference + + return result; + +error: + Py_XDECREF(result); + Py_XDECREF(call_stack); + Py_XDECREF(tn); + return NULL; +} + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to +) { + char is_task; + PyObject* result = NULL; + int err; + + err = read_char( + unwinder, + task_address + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_is_task, + &is_task); + if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read is_task flag"); + goto error; + } + + if (is_task) { + result = create_task_result(unwinder, task_address); + if (!result) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result"); + goto error; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto err; + } else { + // Create an empty CoroInfo for non-task objects + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty CoroInfo"); + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(awaited_by); - - if (parse_task_awaited_by(handle, offsets, async_offsets, - task_address, awaited_by, 1) - ) { - goto err; + PyObject *empty_list = PyList_New(0); + if (empty_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty list"); + goto error; } + PyObject *task_name = PyLong_FromUnsignedLongLong(task_address); + if (task_name == NULL) { + Py_DECREF(empty_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name"); + goto error; + } + PyStructSequence_SetItem(result, 0, empty_list); // This steals the reference + PyStructSequence_SetItem(result, 1, task_name); // This steals the reference + } + if (PyList_Append(render_to, result)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); + goto error; } - Py_DECREF(result); + Py_DECREF(result); return 0; -err: - Py_DECREF(result); +error: + Py_XDECREF(result); return -1; } static int -parse_tasks_in_set( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t set_addr, - PyObject *awaited_by, - int recurse_task +process_single_task_node( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject **task_info, + PyObject *result ) { - uintptr_t set_obj; - if (read_py_ptr( - handle, - set_addr, - &set_obj) - ) { - return -1; + PyObject *tn = NULL; + PyObject *current_awaited_by = NULL; + PyObject *task_id = NULL; + PyObject *result_item = NULL; + PyObject *coroutine_stack = NULL; + + tn = parse_task_name(unwinder, task_addr); + if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node"); + goto error; } - Py_ssize_t num_els; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.used, - &num_els) - ) { - return -1; + current_awaited_by = PyList_New(0); + if (current_awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node"); + goto error; } - Py_ssize_t set_len; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.mask, - &set_len) - ) { - return -1; + // Extract the coroutine stack for this task + coroutine_stack = PyList_New(0); + if (coroutine_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node"); + goto error; } - set_len++; // The set contains the `mask+1` element slots. - uintptr_t table_ptr; - if (read_ptr( - handle, - set_obj + offsets->set_object.table, - &table_ptr) - ) { - return -1; + if (parse_task(unwinder, task_addr, coroutine_stack) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node"); + goto error; } - Py_ssize_t i = 0; - Py_ssize_t els = 0; - while (i < set_len) { - uintptr_t key_addr; - if (read_py_ptr(handle, table_ptr, &key_addr)) { - return -1; - } + task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); + goto error; + } - if ((void*)key_addr != NULL) { - Py_ssize_t ref_cnt; - if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) { - return -1; - } + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result_item = PyStructSequence_New(state->TaskInfo_Type); + if (result_item == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node"); + goto error; + } - if (ref_cnt) { - // if 'ref_cnt=0' it's a set dummy marker - - if (parse_task( - handle, - offsets, - async_offsets, - key_addr, - awaited_by, - recurse_task - ) - ) { - return -1; - } + PyStructSequence_SetItem(result_item, 0, task_id); // steals ref + PyStructSequence_SetItem(result_item, 1, tn); // steals ref + PyStructSequence_SetItem(result_item, 2, coroutine_stack); // steals ref + PyStructSequence_SetItem(result_item, 3, current_awaited_by); // steals ref - if (++els == num_els) { - break; - } - } - } + // References transferred to tuple + task_id = NULL; + tn = NULL; + coroutine_stack = NULL; + current_awaited_by = NULL; - table_ptr += sizeof(void*) * 2; - i++; + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node"); + return -1; + } + if (task_info != NULL) { + *task_info = result_item; + } + Py_DECREF(result_item); + + // Get back current_awaited_by reference for parse_task_awaited_by + current_awaited_by = PyStructSequence_GetItem(result_item, 3); + if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); + // No cleanup needed here since all references were transferred to result_item + // and result_item was already added to result list and decreffed + return -1; } + return 0; + +error: + Py_XDECREF(tn); + Py_XDECREF(current_awaited_by); + Py_XDECREF(task_id); + Py_XDECREF(result_item); + Py_XDECREF(coroutine_stack); + return -1; } +// Thread processor for get_all_awaited_by +static int +process_thread_for_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + uintptr_t head_addr = thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; + return append_awaited_by(unwinder, tid, head_addr, result); +} +// Generic function to process task awaited_by static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, +process_task_awaited_by( + RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *awaited_by, - int recurse_task + set_entry_processor_func processor, + void *context ) { - uintptr_t task_ab_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &task_ab_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + (size_t)unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); return -1; } + uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); if ((void*)task_ab_addr == NULL) { - return 0; + return 0; // No tasks waiting for this one } - char awaited_by_is_a_set; - err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set, - &awaited_by_is_a_set); - if (err) { - return -1; - } + char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); if (awaited_by_is_a_set) { - if (parse_tasks_in_set( - handle, - offsets, - async_offsets, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - awaited_by, - recurse_task - ) - ) { - return -1; - } + return iterate_set_entries(unwinder, task_ab_addr, processor, context); } else { - uintptr_t sub_task; - if (read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &sub_task) - ) { - return -1; + // Single task waiting + return processor(unwinder, task_ab_addr, context); + } +} + +static int +process_running_task_chain( + RemoteUnwinderObject *unwinder, + uintptr_t running_task_addr, + uintptr_t thread_state_addr, + PyObject *result +) { + uintptr_t running_task_code_obj = 0; + if(get_task_code_object(unwinder, running_task_addr, &running_task_code_obj) < 0) { + return -1; + } + + // First, add this task to the result + PyObject *task_info = NULL; + if (process_single_task_node(unwinder, running_task_addr, &task_info, result) < 0) { + return -1; + } + + // Get the chain from the current frame to this task + PyObject *coro_chain = PyStructSequence_GET_ITEM(task_info, 2); + assert(coro_chain != NULL); + if (PyList_GET_SIZE(coro_chain) != 1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Coro chain is not a single item"); + return -1; + } + PyObject *coro_info = PyList_GET_ITEM(coro_chain, 0); + assert(coro_info != NULL); + PyObject *frame_chain = PyStructSequence_GET_ITEM(coro_info, 0); + assert(frame_chain != NULL); + + // Clear the coro_chain + if (PyList_Clear(frame_chain) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to clear coroutine chain"); + return -1; + } + + // Add the chain from the current frame to this task + if (parse_async_frame_chain(unwinder, frame_chain, thread_state_addr, running_task_code_obj) < 0) { + return -1; + } + + // Now find all tasks that are waiting for this task and process them + if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) { + return -1; + } + + return 0; +} + +// Thread processor for get_async_stack_trace +static int +process_thread_for_async_stack_trace( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + + // Find running task in this thread + uintptr_t running_task_addr; + if (find_running_task_in_thread(unwinder, thread_state_addr, &running_task_addr) < 0) { + return 0; + } + + // If we found a running task, process it and its waiters + if ((void*)running_task_addr != NULL) { + PyObject *task_list = PyList_New(0); + if (task_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task list for thread"); + return -1; + } + + if (process_running_task_chain(unwinder, running_task_addr, thread_state_addr, task_list) < 0) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process running task chain"); + return -1; + } + + // Create AwaitedInfo structure for this thread + PyObject *tid_py = PyLong_FromUnsignedLong(tid); + if (tid_py == NULL) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); + return -1; + } + + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *awaited_info = PyStructSequence_New(state->AwaitedInfo_Type); + if (awaited_info == NULL) { + Py_DECREF(tid_py); + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); + return -1; } - if (parse_task( - handle, - offsets, - async_offsets, - sub_task, - awaited_by, - recurse_task - ) - ) { + PyStructSequence_SetItem(awaited_info, 0, tid_py); // steals ref + PyStructSequence_SetItem(awaited_info, 1, task_list); // steals ref + + if (PyList_Append(result, awaited_info)) { + Py_DECREF(awaited_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append AwaitedInfo to result"); return -1; } + Py_DECREF(awaited_info); } return 0; } -typedef struct +static int +process_task_and_waiters( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + // First, add this task to the result + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { + return -1; + } + + // Now find all tasks that are waiting for this task and process them + return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result); +} + +static int +find_running_task_in_thread( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + uintptr_t *running_task_addr +) { + *running_task_addr = (uintptr_t)NULL; + + uintptr_t address_of_running_loop; + int bytes_read = read_py_ptr( + unwinder, + thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, + &address_of_running_loop); + if (bytes_read == -1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address"); + return -1; + } + + // no asyncio loop is now running + if ((void*)address_of_running_loop == NULL) { + return 0; + } + + int err = read_ptr( + unwinder, + thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, + running_task_addr); + if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address"); + return -1; + } + + return 0; +} + +static int +get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr) { + uintptr_t running_coro_addr = 0; + + if(read_py_ptr( + unwinder, + task_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_coro, + &running_coro_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed"); + return -1; + } + + if (running_coro_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL"); + return -1; + } + + // note: genobject's gi_iframe is an embedded struct so the address to + // the offset leads directly to its first field: f_executable + if (read_py_ptr( + unwinder, + running_coro_addr + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe, code_obj_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object"); + return -1; + } + + if (*code_obj_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL"); + return -1; + } + + return 0; +} + +/* ============================================================================ + * TLBC CACHING FUNCTIONS + * ============================================================================ */ + +#ifdef Py_GIL_DISABLED + +typedef struct { + void *tlbc_array; // Local copy of the TLBC array + Py_ssize_t tlbc_array_size; // Size of the TLBC array + uint32_t generation; // Generation when this was cached +} TLBCCacheEntry; + +static void +tlbc_cache_entry_destroy(void *ptr) { - int lineno; - int end_lineno; - int column; - int end_column; -} LocationInfo; + TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr; + if (entry->tlbc_array) { + PyMem_RawFree(entry->tlbc_array); + } + PyMem_RawFree(entry); +} + +static TLBCCacheEntry * +get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation) +{ + void *key = (void *)code_addr; + TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key); + + if (entry && entry->generation != current_generation) { + // Entry is stale, remove it by setting to NULL + _Py_hashtable_set(self->tlbc_cache, key, NULL); + entry = NULL; + } + + return entry; +} + +static int +cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) +{ + uintptr_t tlbc_array_ptr; + void *tlbc_array = NULL; + TLBCCacheEntry *entry = NULL; + + // Read the TLBC array pointer + if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer"); + return 0; // No TLBC array + } + + // Read the TLBC array size + Py_ssize_t tlbc_size; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size"); + return 0; // Invalid size + } + + // Allocate and read the entire TLBC array + size_t array_data_size = tlbc_size * sizeof(void*); + tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size); + if (!tlbc_array) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array"); + return 0; // Memory error + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { + PyMem_RawFree(tlbc_array); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data"); + return 0; // Read error + } + + // Create cache entry + entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry)); + if (!entry) { + PyMem_RawFree(tlbc_array); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry"); + return 0; // Memory error + } + + entry->tlbc_array = tlbc_array; + entry->tlbc_array_size = tlbc_size; + entry->generation = generation; + + // Store in cache + void *key = (void *)code_addr; + if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) { + tlbc_cache_entry_destroy(entry); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache"); + return 0; // Cache error + } + + return 1; // Success +} + + + +#endif + +/* ============================================================================ + * LINE TABLE PARSING FUNCTIONS + * ============================================================================ */ static int scan_varint(const uint8_t **ptr) @@ -818,12 +1656,11 @@ scan_signed_varint(const uint8_t **ptr) } } - static bool parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) { const uint8_t* ptr = (const uint8_t*)(linetable); - uint64_t addr = 0; + uintptr_t addr = 0; info->lineno = firstlineno; while (*ptr != '\0') { @@ -863,7 +1700,9 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L } default: { uint8_t second_byte = *(ptr++); - assert((second_byte & 128) == 0); + if ((second_byte & 128) != 0) { + return false; + } info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -877,270 +1716,410 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L return false; } +/* ============================================================================ + * CODE OBJECT AND FRAME PARSING FUNCTIONS + * ============================================================================ */ + static int -read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message) +parse_code_object(RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t instruction_pointer, + uintptr_t *previous_frame, + int32_t tlbc_index) { - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr); - if (bytes_read < 0) { - return -1; - } + void *key = (void *)address; + CachedCodeMetadata *meta = NULL; + PyObject *func = NULL; + PyObject *file = NULL; + PyObject *linetable = NULL; + PyObject *lineno = NULL; + PyObject *tuple = NULL; - if ((void *)(*out_ptr) == NULL) { - PyErr_SetString(PyExc_RuntimeError, error_message); - return -1; +#ifdef Py_GIL_DISABLED + // In free threading builds, code object addresses might have the low bit set + // as a flag, so we need to mask it off to get the real address + uintptr_t real_address = address & (~1); +#else + uintptr_t real_address = address; +#endif + + if (unwinder && unwinder->code_object_cache != NULL) { + meta = _Py_hashtable_get(unwinder->code_object_cache, key); } - return 0; -} + if (meta == NULL) { + char code_object[SIZEOF_CODE_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object"); + goto error; + } -static int -read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets, - uintptr_t current_frame, uintptr_t *instruction_ptr) -{ - return read_remote_pointer( - handle, - current_frame + offsets->interpreter_frame.instr_ptr, - instruction_ptr, - "No instruction ptr found" - ); -} + func = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024); + if (!func) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object"); + goto error; + } -static int -parse_code_object(proc_handle_t *handle, - PyObject **result, - struct _Py_DebugOffsets *offsets, - uintptr_t address, - uintptr_t current_frame, - uintptr_t *previous_frame) -{ - uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr; + file = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024); + if (!file) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object"); + goto error; + } - if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 || - read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) { - return -1; - } + linetable = read_py_bytes(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096); + if (!linetable) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object"); + goto error; + } - int firstlineno; - if (_Py_RemoteDebug_ReadRemoteMemory(handle, - address + offsets->code_object.firstlineno, - sizeof(int), - &firstlineno) < 0) { - return -1; + meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata)); + if (!meta) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata"); + goto error; + } + + meta->func_name = func; + meta->file_name = file; + meta->linetable = linetable; + meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno); + meta->addr_code_adaptive = real_address + (uintptr_t)unwinder->debug_offsets.code_object.co_code_adaptive; + + if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) { + cached_code_metadata_destroy(meta); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata"); + goto error; + } + + // Ownership transferred to meta + func = NULL; + file = NULL; + linetable = NULL; } - PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable); - if (!py_linetable) { - return -1; + uintptr_t ip = instruction_pointer; + ptrdiff_t addrq; + +#ifdef Py_GIL_DISABLED + // Handle thread-local bytecode (TLBC) in free threading builds + if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { + // No TLBC or no unwinder - use main bytecode directly + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + goto done_tlbc; } - uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive; - ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive; + // Try to get TLBC data from cache (we'll get generation from the caller) + TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); - LocationInfo info; - parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info); - Py_DECREF(py_linetable); // Done with linetable + if (!tlbc_entry) { + // Cache miss - try to read and cache TLBC array + if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array"); + goto error; + } + tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + } - PyObject *py_line = PyLong_FromLong(info.lineno); - if (!py_line) { - return -1; + if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { + // Use cached TLBC data + uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); + uintptr_t tlbc_bytecode_addr = entries[tlbc_index]; + + if (tlbc_bytecode_addr != 0) { + // Calculate offset from TLBC bytecode + addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr; + goto done_tlbc; + } } - PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256); - if (!py_func_name) { - Py_DECREF(py_line); - return -1; + // Fall back to main bytecode + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + +done_tlbc: +#else + // Non-free-threaded build, always use the main bytecode + (void)tlbc_index; // Suppress unused parameter warning + (void)unwinder; // Suppress unused parameter warning + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; +#endif + ; // Empty statement to avoid C23 extension warning + LocationInfo info = {0}; + bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + meta->first_lineno, &info); + if (!ok) { + info.lineno = -1; } - PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256); - if (!py_file_name) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - return -1; + lineno = PyLong_FromLong(info.lineno); + if (!lineno) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); + goto error; } - PyObject *result_tuple = PyTuple_New(3); - if (!result_tuple) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - Py_DECREF(py_file_name); - return -1; + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + tuple = PyStructSequence_New(state->FrameInfo_Type); + if (!tuple) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object"); + goto error; } - PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref + Py_INCREF(meta->func_name); + Py_INCREF(meta->file_name); + PyStructSequence_SetItem(tuple, 0, meta->file_name); + PyStructSequence_SetItem(tuple, 1, lineno); + PyStructSequence_SetItem(tuple, 2, meta->func_name); - *result = result_tuple; + *result = tuple; return 0; + +error: + Py_XDECREF(func); + Py_XDECREF(file); + Py_XDECREF(linetable); + Py_XDECREF(lineno); + Py_XDECREF(tuple); + return -1; +} + +/* ============================================================================ + * STACK CHUNK MANAGEMENT FUNCTIONS + * ============================================================================ */ + +static void +cleanup_stack_chunks(StackChunkList *chunks) +{ + for (size_t i = 0; i < chunks->count; ++i) { + PyMem_RawFree(chunks->chunks[i].local_copy); + } + PyMem_RawFree(chunks->chunks); } static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame +process_single_stack_chunk( + RemoteUnwinderObject *unwinder, + uintptr_t chunk_addr, + StackChunkInfo *chunk_info ) { - int err; + // Start with default size assumption + size_t current_size = _PY_DATA_STACK_CHUNK_SIZE; - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + char *this_chunk = PyMem_RawMalloc(current_size); + if (!this_chunk) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer"); return -1; } - char owner; - if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk"); return -1; } - if (owner >= FRAME_OWNED_BY_INTERPRETER) { - return 0; - } - - uintptr_t address_of_code_object; - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - &address_of_code_object - ); - if (err) { - return -1; - } + // Check actual size and reread if necessary + size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); + if (actual_size != current_size) { + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); + if (!this_chunk) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer"); + return -1; + } - if ((void*)address_of_code_object == NULL) { - return 0; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size"); + return -1; + } + current_size = actual_size; } - return parse_code_object( - handle, result, offsets, address_of_code_object, address, previous_frame); + chunk_info->remote_addr = chunk_addr; + chunk_info->size = current_size; + chunk_info->local_copy = this_chunk; + return 0; } static int -parse_async_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame, - uintptr_t* code_object -) { - int err; +copy_stack_chunks(RemoteUnwinderObject *unwinder, + uintptr_t tstate_addr, + StackChunkList *out_chunks) +{ + uintptr_t chunk_addr; + StackChunkInfo *chunks = NULL; + size_t count = 0; + size_t max_chunks = 16; - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address"); return -1; } - char owner; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner); - if (bytes_read < 0) { + chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo)); + if (!chunks) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array"); return -1; } - if (owner == FRAME_OWNED_BY_CSTACK || owner == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } + while (chunk_addr != 0) { + // Grow array if needed + if (count >= max_chunks) { + max_chunks *= 2; + StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo)); + if (!new_chunks) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array"); + goto error; + } + chunks = new_chunks; + } - if (owner != FRAME_OWNED_BY_GENERATOR - && owner != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); - return -1; - } + // Process this chunk + if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk"); + goto error; + } - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - code_object - ); - if (err) { - return -1; + // Get next chunk address and increment count + chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous)); + count++; } - assert(code_object != NULL); - if ((void*)*code_object == NULL) { - return 0; - } + out_chunks->chunks = chunks; + out_chunks->count = count; + return 0; - if (parse_code_object( - handle, result, offsets, *code_object, address, previous_frame)) { - return -1; +error: + for (size_t i = 0; i < count; ++i) { + PyMem_RawFree(chunks[i].local_copy); } + PyMem_RawFree(chunks); + return -1; +} - return 1; +static void * +find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) +{ + for (size_t i = 0; i < chunks->count; ++i) { + uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); + size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); + + if (remote_ptr >= base && remote_ptr < base + payload) { + return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr); + } + } + return NULL; } static int -read_async_debug( - proc_handle_t *handle, - struct _Py_AsyncioModuleDebugOffsets* async_debug +parse_frame_from_chunks( + RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t *previous_frame, + StackChunkList *chunks ) { - uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle); - if (!async_debug_addr) { + void *frame_ptr = find_frame_in_chunks(chunks, address); + if (!frame_ptr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks"); return -1; } - size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug); - return result; + char *frame = (char *)frame_ptr; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; + } + + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); + } +#endif + + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); } +/* ============================================================================ + * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS + * ============================================================================ */ + static int -find_running_frame( - proc_handle_t *handle, +populate_initial_state_data( + int all_threads, + RemoteUnwinderObject *unwinder, uintptr_t runtime_start_address, - _Py_DebugOffsets* local_debug_offsets, - uintptr_t *frame + uintptr_t *interpreter_state, + uintptr_t *tstate ) { - uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + uintptr_t interpreter_state_list_head = + (uintptr_t)unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL"); return -1; } - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_main, + *interpreter_state = address_of_interpreter_state; + + if (all_threads) { + *tstate = 0; + return 0; + } + + uintptr_t address_of_thread = address_of_interpreter_state + + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main; + + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address_of_thread, sizeof(void*), - &address_of_thread); - if (bytes_read < 0) { + tstate) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address"); return -1; } - // No Python frames are available for us (can happen at tear-down). + return 0; +} + + +static int +find_running_frame( + RemoteUnwinderObject *unwinder, + uintptr_t address_of_thread, + uintptr_t *frame +) { if ((void*)address_of_thread != NULL) { int err = read_ptr( - handle, - address_of_thread + local_debug_offsets->thread_state.current_frame, + unwinder, + address_of_thread + (uintptr_t)unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer"); return -1; } return 0; @@ -1150,166 +2129,179 @@ find_running_frame( return 0; } -static int -find_running_task( - proc_handle_t *handle, - uintptr_t runtime_start_address, - _Py_DebugOffsets *local_debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - uintptr_t *running_task_addr +/* ============================================================================ + * FRAME PARSING FUNCTIONS + * ============================================================================ */ + +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr ) { - *running_task_addr = (uintptr_t)NULL; + if ((void*)code_object_addr == NULL) { + return 0; + } - uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + void* frame = (void*)frame_addr; - uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &address_of_interpreter_state); - if (bytes_read < 0) { - return -1; + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame } - if (address_of_interpreter_state == 0) { - PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR + && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); return -1; } + return 1; +} - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_head, - sizeof(void*), - &address_of_thread); +static int +parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* address_of_code_object, + uintptr_t* previous_frame +) { + char frame[SIZEOF_INTERP_FRAME]; + *address_of_code_object = 0; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame"); return -1; } - uintptr_t address_of_running_loop; - // No Python frames are available for us (can happen at tear-down). - if ((void*)address_of_thread == NULL) { - return 0; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; } - bytes_read = read_py_ptr( - handle, - address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_loop, - &address_of_running_loop); - if (bytes_read == -1) { - return -1; - } + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); - // no asyncio loop is now running - if ((void*)address_of_running_loop == NULL) { - return 0; + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - int err = read_ptr( - handle, - address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_task, - running_task_addr); - if (err) { + *address_of_code_object = code_object; + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); +} + +static int + parse_async_frame_chain( + RemoteUnwinderObject *unwinder, + PyObject *calls, + uintptr_t address_of_thread, + uintptr_t running_task_code_obj +) { + uintptr_t address_of_current_frame; + if (find_running_frame(unwinder, address_of_thread, &address_of_current_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain"); return -1; } + while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; + uintptr_t address_of_code_object; + int res = parse_frame_object( + unwinder, + &frame_info, + address_of_current_frame, + &address_of_code_object, + &address_of_current_frame + ); + + if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Async frame object parsing failed in chain"); + return -1; + } + + if (!frame_info) { + continue; + } + + if (PyList_Append(calls, frame_info) == -1) { + Py_DECREF(frame_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame info to async chain"); + return -1; + } + + Py_DECREF(frame_info); + + if (address_of_code_object == running_task_code_obj) { + break; + } + } + return 0; } +/* ============================================================================ + * AWAITED BY PARSING FUNCTIONS + * ============================================================================ */ + static int append_awaited_by_for_thread( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, PyObject *result ) { - struct llist_node task_node; + char task_node[SIZEOF_LLIST_NODE]; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - head_addr, - sizeof(task_node), - &task_node)) - { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr, + sizeof(task_node), task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task node head"); return -1; } size_t iteration_count = 0; const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound - while ((uintptr_t)task_node.next != head_addr) { + + while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) { if (++iteration_count > MAX_ITERATIONS) { PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task list iteration limit exceeded"); return -1; } - if (task_node.next == NULL) { - PyErr_SetString( - PyExc_RuntimeError, - "Invalid linked list structure reading remote memory"); + if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid linked list structure reading remote memory"); + set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list"); return -1; } - uintptr_t task_addr = (uintptr_t)task_node.next - - async_offsets->asyncio_task_object.task_node; - - PyObject *tn = parse_task_name( - handle, - debug_offsets, - async_offsets, - task_addr); - if (tn == NULL) { - return -1; - } + uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) + - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node; - PyObject *current_awaited_by = PyList_New(0); - if (current_awaited_by == NULL) { - Py_DECREF(tn); + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } - PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr); - if (task_id == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - return -1; - } - - PyObject *result_item = PyTuple_New(3); - if (result_item == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - Py_DECREF(task_id); - return -1; - } - - PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref - PyTuple_SET_ITEM(result_item, 1, tn); // steals ref - PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); - return -1; - } - Py_DECREF(result_item); - - if (parse_task_awaited_by(handle, debug_offsets, async_offsets, - task_addr, current_awaited_by, 0)) - { - return -1; - } - - // onto the next one... - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - (uintptr_t)task_node.next, - sizeof(task_node), - &task_node)) - { + // Read next node + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + sizeof(task_node), + task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by"); return -1; } } @@ -1319,482 +2311,798 @@ append_awaited_by_for_thread( static int append_awaited_by( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, PyObject *result) { PyObject *tid_py = PyLong_FromUnsignedLong(tid); if (tid_py == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID object"); return -1; } - PyObject *result_item = PyTuple_New(2); - if (result_item == NULL) { + PyObject* awaited_by_for_thread = PyList_New(0); + if (awaited_by_for_thread == NULL) { Py_DECREF(tid_py); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list"); return -1; } - PyObject* awaited_by_for_thread = PyList_New(0); - if (awaited_by_for_thread == NULL) { + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *result_item = PyStructSequence_New(state->AwaitedInfo_Type); + if (result_item == NULL) { Py_DECREF(tid_py); - Py_DECREF(result_item); + Py_DECREF(awaited_by_for_thread); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); return -1; } - PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref - PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref + PyStructSequence_SetItem(result_item, 0, tid_py); // steals ref + PyStructSequence_SetItem(result_item, 1, awaited_by_for_thread); // steals ref if (PyList_Append(result, result_item)) { Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item"); return -1; } Py_DECREF(result_item); - if (append_awaited_by_for_thread( - handle, - head_addr, - debug_offsets, - async_offsets, - awaited_by_for_thread)) + if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by for thread"); return -1; } return 0; } -static PyObject* -get_all_awaited_by(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_all_awaited_by is not implemented on this platform"); - return NULL; -#endif +/* ============================================================================ + * STACK UNWINDING FUNCTIONS + * ============================================================================ */ - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; - } +static int +process_frame_chain( + RemoteUnwinderObject *unwinder, + uintptr_t initial_frame_addr, + StackChunkList *chunks, + PyObject *frame_info +) { + uintptr_t frame_addr = initial_frame_addr; + uintptr_t prev_frame_addr = 0; + const size_t MAX_FRAMES = 1024; + size_t frame_count = 0; + + while ((void*)frame_addr != NULL) { + PyObject *frame = NULL; + uintptr_t next_frame_addr = 0; + + if (++frame_count > MAX_FRAMES) { + PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded"); + return -1; + } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } + // Try chunks first, fallback to direct memory read + if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { + PyErr_Clear(); + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain"); + return -1; + } + } - PyObject *result = NULL; + if (!frame) { + break; + } - uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_addr == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); + if (prev_frame_addr && frame_addr != prev_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Broken frame chain: expected frame at 0x%lx, got 0x%lx", + prev_frame_addr, frame_addr); + Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed"); + return -1; } - goto result_err; - } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; - } + if (PyList_Append(frame_info, frame) == -1) { + Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list"); + return -1; + } + Py_DECREF(frame); - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + prev_frame_addr = next_frame_addr; + frame_addr = next_frame_addr; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; - } + return 0; +} - uint64_t interpreter_state_list_head = - local_debug_offsets.runtime_state.interpreters_head; +static PyObject* +unwind_stack_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t *current_tstate +) { + PyObject *frame_info = NULL; + PyObject *thread_id = NULL; + PyObject *result = NULL; + StackChunkList chunks = {0}; - uintptr_t interpreter_state_addr; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_addr + interpreter_state_list_head, - sizeof(void*), - &interpreter_state_addr)) - { - goto result_err; + char ts[SIZEOF_THREAD_STATE]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, *current_tstate, (size_t)unwinder->debug_offsets.thread_state.size, ts); + if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state"); + goto error; } - uintptr_t thread_state_addr; - unsigned long tid = 0; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - interpreter_state_addr - + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); + + frame_info = PyList_New(0); + if (!frame_info) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list"); + goto error; } - uintptr_t head_addr; - while (thread_state_addr != 0) { - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr - + local_debug_offsets.thread_state.native_thread_id, - sizeof(tid), - &tid)) - { - goto result_err; - } + if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks"); + goto error; + } - head_addr = thread_state_addr - + local_async_debug.asyncio_thread_state.asyncio_tasks_head; + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); + goto error; + } - if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; - } + *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr + local_debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; - } + thread_id = PyLong_FromLongLong( + GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); + if (thread_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); + goto error; } - head_addr = interpreter_state_addr - + local_async_debug.asyncio_interpreter_state.asyncio_tasks_head; - - // On top of a per-thread task lists used by default by asyncio to avoid - // contention, there is also a fallback per-interpreter list of tasks; - // any tasks still pending when a thread is destroyed will be moved to the - // per-interpreter task list. It's unlikely we'll find anything here, but - // interesting for debugging. - if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->ThreadInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo"); + goto error; } - _Py_RemoteDebug_CleanupProcHandle(handle); + PyStructSequence_SetItem(result, 0, thread_id); // Steals reference + PyStructSequence_SetItem(result, 1, frame_info); // Steals reference + + cleanup_stack_chunks(&chunks); return result; -result_err: +error: + Py_XDECREF(frame_info); + Py_XDECREF(thread_id); Py_XDECREF(result); - _Py_RemoteDebug_CleanupProcHandle(handle); + cleanup_stack_chunks(&chunks); return NULL; } -static PyObject* -get_stack_trace(PyObject* self, PyObject* args) + +/* ============================================================================ + * REMOTEUNWINDER CLASS IMPLEMENTATION + * ============================================================================ */ + +/*[clinic input] +class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/ + +/*[clinic input] +_remote_debugging.RemoteUnwinder.__init__ + pid: int + * + all_threads: bool = False + only_active_thread: bool = False + debug: bool = False + +Initialize a new RemoteUnwinder object for debugging a remote Python process. + +Args: + pid: Process ID of the target Python process to debug + all_threads: If True, initialize state for all threads in the process. + If False, only initialize for the main thread. + only_active_thread: If True, only sample the thread holding the GIL. + Cannot be used together with all_threads=True. + debug: If True, chain exceptions to explain the sequence of events that + lead to the exception. + +The RemoteUnwinder provides functionality to inspect and debug a running Python +process, including examining thread states, stack frames and other runtime data. + +Raises: + PermissionError: If access to the target process is denied + OSError: If unable to attach to the target process or access its memory + RuntimeError: If unable to read debug information from the target process + ValueError: If both all_threads and only_active_thread are True +[clinic start generated code]*/ + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads, + int only_active_thread, + int debug) +/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/ { -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; + // Validate that all_threads and only_active_thread are not both True + if (all_threads && only_active_thread) { + PyErr_SetString(PyExc_ValueError, + "all_threads and only_active_thread cannot both be True"); + return -1; + } + +#ifdef Py_GIL_DISABLED + if (only_active_thread) { + PyErr_SetString(PyExc_ValueError, + "only_active_thread is not supported when Py_GIL_DISABLED is not defined"); + return -1; + } #endif - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; + self->debug = debug; + self->only_active_thread = only_active_thread; + self->cached_state = NULL; + if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); + return -1; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; + self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); + if (self->runtime_start_address == 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address"); + return -1; } - PyObject* result = NULL; + if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle, + &self->runtime_start_address, + &self->debug_offsets) < 0) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets"); + return -1; + } - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + // Validate that the debug offsets are valid + if(validate_debug_offsets(&self->debug_offsets) == -1) { + set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found"); + return -1; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + // Try to read async debug offsets, but don't fail if they're not available + self->async_debug_offsets_available = 1; + if (read_async_debug(self) < 0) { + PyErr_Clear(); + memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets)); + self->async_debug_offsets_available = 0; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - goto result_err; + if (populate_initial_state_data(all_threads, self, self->runtime_start_address, + &self->interpreter_addr ,&self->tstate_addr) < 0) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to populate initial state data"); + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + self->code_object_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + cached_code_metadata_destroy, + NULL + ); + if (self->code_object_cache == NULL) { + PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create code object cache"); + return -1; } - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - if (parse_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame) - < 0) - { - Py_DECREF(result); - goto result_err; +#ifdef Py_GIL_DISABLED + // Initialize TLBC cache + self->tlbc_generation = 0; + self->tlbc_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + tlbc_cache_entry_destroy, + NULL + ); + if (self->tlbc_cache == NULL) { + _Py_hashtable_destroy(self->code_object_cache); + PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create TLBC cache"); + return -1; + } +#endif + + return 0; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_stack_trace + +Returns a list of stack traces for threads in the target process. + +Each element in the returned list is a tuple of (thread_id, frame_list), where: +- thread_id is the OS thread identifier +- frame_list is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for that thread, ordered from most recent to oldest + +The threads returned depend on the initialization parameters: +- If only_active_thread was True: returns only the thread holding the GIL +- If all_threads was True: returns all threads +- Otherwise: returns only the main thread + +Example: + [ + (1234, [ + ('process_data', 'worker.py', 127), + ('run_worker', 'worker.py', 45), + ('main', 'app.py', 23) + ]), + (1235, [ + ('handle_request', 'server.py', 89), + ('serve_forever', 'server.py', 52) + ]) + ] + +Raises: + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/ +{ + PyObject* result = NULL; + // Read interpreter state into opaque buffer + char interp_state_buffer[INTERP_STATE_BUFFER_SIZE]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr, + INTERP_STATE_BUFFER_SIZE, + interp_state_buffer) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read interpreter state buffer"); + goto exit; + } + + // Get code object generation from buffer + uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer, + self->debug_offsets.interpreter_state.code_object_generation); + + if (code_object_generation != self->code_object_generation) { + self->code_object_generation = code_object_generation; + _Py_hashtable_clear(self->code_object_cache); + } + + // If only_active_thread is true, we need to determine which thread holds the GIL + PyThreadState* gil_holder = NULL; + if (self->only_active_thread) { + // The GIL state is already in interp_state_buffer, just read from there + // Check if GIL is locked + int gil_locked = GET_MEMBER(int, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_locked); + + if (gil_locked) { + // Get the last holder (current holder when GIL is locked) + gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_holder); + } else { + // GIL is not locked, return empty list + result = PyList_New(0); + if (!result) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list"); + } + goto exit; } + } + +#ifdef Py_GIL_DISABLED + // Check TLBC generation and invalidate cache if needed + uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer, + self->debug_offsets.interpreter_state.tlbc_generation); + if (current_tlbc_generation != self->tlbc_generation) { + self->tlbc_generation = current_tlbc_generation; + _Py_hashtable_clear(self->tlbc_cache); + } +#endif + uintptr_t current_tstate; + if (self->only_active_thread && gil_holder != NULL) { + // We have the GIL holder, process only that thread + current_tstate = (uintptr_t)gil_holder; + } else if (self->tstate_addr == 0) { + // Get threads head from buffer + current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer, + self->debug_offsets.interpreter_state.threads_head); + } else { + current_tstate = self->tstate_addr; + } + + result = PyList_New(0); + if (!result) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create stack trace result list"); + goto exit; + } + + while (current_tstate != 0) { + PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate); if (!frame_info) { - continue; + Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread"); + goto exit; } if (PyList_Append(result, frame_info) == -1) { - Py_DECREF(result); - goto result_err; + Py_DECREF(frame_info); + Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to append thread frame info"); + goto exit; } - Py_DECREF(frame_info); - frame_info = NULL; + // We are targeting a single tstate, break here + if (self->tstate_addr) { + break; + } + + // If we're only processing the GIL holder, we're done after one iteration + if (self->only_active_thread && gil_holder != NULL) { + break; + } } -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); +exit: + _Py_RemoteDebug_ClearCache(&self->handle); return result; } -static PyObject* -get_async_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif - int pid; +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_all_awaited_by + +Get all tasks and their awaited_by relationships from the remote process. + +This provides a tree structure showing which tasks are waiting for other tasks. + +For each task, returns: +1. The call stack frames leading to where the task is currently executing +2. The name of the task +3. A list of tasks that this task is waiting for, with their own frames/names/etc + +Returns a list of [frames, task_name, subtasks] where: +- frames: List of (func_name, filename, lineno) showing the call stack +- task_name: String identifier for the task +- subtasks: List of tasks being awaited by this task, in same format + +Raises: + RuntimeError: If AsyncioDebug section is not available in the remote process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output: +[ + # Task c2_root waiting for two subtasks + [ + # Call stack of c2_root + [("c5", "script.py", 10), ("c4", "script.py", 14)], + "c2_root", + [ + # First subtask (sub_main_2) and what it's waiting for + [ + [("c1", "script.py", 23)], + "sub_main_2", + [...] + ], + # Second subtask and its waiters + [...] + ] + ] +] +[clinic start generated code]*/ - if (!PyArg_ParseTuple(args, "i", &pid)) { +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6a49cd345e8aec53 input=a452c652bb00701a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_all_awaited_by"); return NULL; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } - - PyObject *result = NULL; - - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } + PyObject *result = PyList_New(0); + if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create awaited_by result list"); goto result_err; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); + // Process all threads + if (iterate_threads(self, process_thread_for_awaited_by, result) < 0) { goto result_err; } - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; - } + uintptr_t head_addr = self->interpreter_addr + + (uintptr_t)self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head; - result = PyList_New(1); - if (result == NULL) { - goto result_err; - } - PyObject* calls = PyList_New(0); - if (calls == NULL) { - goto result_err; - } - if (PyList_SetItem(result, 0, calls)) { /* steals ref to 'calls' */ - Py_DECREF(calls); + // On top of a per-thread task lists used by default by asyncio to avoid + // contention, there is also a fallback per-interpreter list of tasks; + // any tasks still pending when a thread is destroyed will be moved to the + // per-interpreter task list. It's unlikely we'll find anything here, but + // interesting for debugging. + if (append_awaited_by(self, 0, head_addr, result)) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter awaited_by in get_all_awaited_by"); goto result_err; } - uintptr_t running_task_addr = (uintptr_t)NULL; - if (find_running_task( - handle, runtime_start_address, &local_debug_offsets, &local_async_debug, - &running_task_addr) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); - goto result_err; - } + _Py_RemoteDebug_ClearCache(&self->handle); + return result; - if ((void*)running_task_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No running task found"); - goto result_err; - } +result_err: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} - uintptr_t running_coro_addr; - if (read_py_ptr( - handle, - running_task_addr + local_async_debug.asyncio_task_object.task_coro, - &running_coro_addr - )) { - chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); - goto result_err; - } +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_async_stack_trace + +Get the currently running async tasks and their dependency graphs from the remote process. + +This returns information about running tasks and all tasks that are waiting for them, +forming a complete dependency graph for each thread's active task. + +For each thread with a running task, returns the running task plus all tasks that +transitively depend on it (tasks waiting for the running task, tasks waiting for +those tasks, etc.). + +Returns a list of per-thread results, where each thread result contains: +- Thread ID +- List of task information for the running task and all its waiters + +Each task info contains: +- Task ID (memory address) +- Task name +- Call stack frames: List of (func_name, filename, lineno) +- List of tasks waiting for this task (recursive structure) + +Raises: + RuntimeError: If AsyncioDebug section is not available in the target process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output (similar structure to get_all_awaited_by but only for running tasks): +[ + # Thread 140234 results + (140234, [ + # Running task and its complete waiter dependency graph + (4345585712, 'main_task', + [("run_server", "server.py", 127), ("main", "app.py", 23)], + [ + # Tasks waiting for main_task + (4345585800, 'worker_1', [...], [...]), + (4345585900, 'worker_2', [...], [...]) + ]) + ]) +] + +[clinic start generated code]*/ - if ((void*)running_coro_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); - goto result_err; +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6433d52b55e87bbe input=8744b47c9ec2220a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_async_stack_trace"); + return NULL; } - // note: genobject's gi_iframe is an embedded struct so the address to - // the offset leads directly to its first field: f_executable - uintptr_t address_of_running_task_code_obj; - if (read_py_ptr( - handle, - running_coro_addr + local_debug_offsets.gen_object.gi_iframe, - &address_of_running_task_code_obj - )) { - goto result_err; + PyObject *result = PyList_New(0); + if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create result list in get_async_stack_trace"); + return NULL; } - if ((void*)address_of_running_task_code_obj == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + // Process all threads + if (iterate_threads(self, process_thread_for_async_stack_trace, result) < 0) { goto result_err; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); - goto result_err; + _Py_RemoteDebug_ClearCache(&self->handle); + return result; +result_err: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} + +static PyMethodDef RemoteUnwinder_methods[] = { + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF + {NULL, NULL} +}; + +static void +RemoteUnwinder_dealloc(PyObject *op) +{ + RemoteUnwinderObject *self = RemoteUnwinder_CAST(op); + PyTypeObject *tp = Py_TYPE(self); + if (self->code_object_cache) { + _Py_hashtable_destroy(self->code_object_cache); } +#ifdef Py_GIL_DISABLED + if (self->tlbc_cache) { + _Py_hashtable_destroy(self->tlbc_cache); + } +#endif + if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); + _Py_RemoteDebug_CleanupProcHandle(&self->handle); + } + PyObject_Del(self); + Py_DECREF(tp); +} - uintptr_t address_of_code_object; - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - int res = parse_async_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame, - &address_of_code_object - ); +static PyType_Slot RemoteUnwinder_slots[] = { + {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."}, + {Py_tp_methods, RemoteUnwinder_methods}, + {Py_tp_init, _remote_debugging_RemoteUnwinder___init__}, + {Py_tp_dealloc, RemoteUnwinder_dealloc}, + {0, NULL} +}; - if (res < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); - goto result_err; - } +static PyType_Spec RemoteUnwinder_spec = { + .name = "_remote_debugging.RemoteUnwinder", + .basicsize = sizeof(RemoteUnwinderObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = RemoteUnwinder_slots, +}; - if (!frame_info) { - continue; - } +/* ============================================================================ + * MODULE INITIALIZATION + * ============================================================================ */ - if (PyList_Append(calls, frame_info) == -1) { - Py_DECREF(calls); - goto result_err; - } +static int +_remote_debugging_exec(PyObject *m) +{ + RemoteDebuggingState *st = RemoteDebugging_GetState(m); +#define CREATE_TYPE(mod, type, spec) \ + do { \ + type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \ + if (type == NULL) { \ + return -1; \ + } \ + } while (0) + + CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec); + + if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) { + return -1; + } - Py_DECREF(frame_info); - frame_info = NULL; + // Initialize structseq types + st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc); + if (st->TaskInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->TaskInfo_Type) < 0) { + return -1; + } - if (address_of_code_object == address_of_running_task_code_obj) { - break; - } + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); + if (st->FrameInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->FrameInfo_Type) < 0) { + return -1; } - PyObject *tn = parse_task_name( - handle, &local_debug_offsets, &local_async_debug, running_task_addr); - if (tn == NULL) { - goto result_err; + st->CoroInfo_Type = PyStructSequence_NewType(&CoroInfo_desc); + if (st->CoroInfo_Type == NULL) { + return -1; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto result_err; + if (PyModule_AddType(m, st->CoroInfo_Type) < 0) { + return -1; } - Py_DECREF(tn); - PyObject* awaited_by = PyList_New(0); - if (awaited_by == NULL) { - goto result_err; + st->ThreadInfo_Type = PyStructSequence_NewType(&ThreadInfo_desc); + if (st->ThreadInfo_Type == NULL) { + return -1; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto result_err; + if (PyModule_AddType(m, st->ThreadInfo_Type) < 0) { + return -1; } - Py_DECREF(awaited_by); - if (parse_task_awaited_by( - handle, &local_debug_offsets, &local_async_debug, - running_task_addr, awaited_by, 1) - ) { - goto result_err; + st->AwaitedInfo_Type = PyStructSequence_NewType(&AwaitedInfo_desc); + if (st->AwaitedInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) { + return -1; + } +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); +#endif + int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); + if (rc < 0) { + return -1; + } + if (RemoteDebugging_InitState(st) < 0) { + return -1; } + return 0; +} - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; +static int +remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_VISIT(state->RemoteDebugging_Type); + Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->FrameInfo_Type); + Py_VISIT(state->CoroInfo_Type); + Py_VISIT(state->ThreadInfo_Type); + Py_VISIT(state->AwaitedInfo_Type); + return 0; +} -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); - Py_XDECREF(result); - return NULL; +static int +remote_debugging_clear(PyObject *mod) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_CLEAR(state->RemoteDebugging_Type); + Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->FrameInfo_Type); + Py_CLEAR(state->CoroInfo_Type); + Py_CLEAR(state->ThreadInfo_Type); + Py_CLEAR(state->AwaitedInfo_Type); + return 0; } +static void +remote_debugging_free(void *mod) +{ + (void)remote_debugging_clear((PyObject *)mod); +} -static PyMethodDef methods[] = { - {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given pod"}, - {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, - "Get the asyncio stack from a given pid"}, - {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, - "Get all tasks and their awaited_by from a given pid"}, +static PyModuleDef_Slot remote_debugging_slots[] = { + {Py_mod_exec, _remote_debugging_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static PyMethodDef remote_debugging_methods[] = { {NULL, NULL, 0, NULL}, }; -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, +static struct PyModuleDef remote_debugging_module = { + PyModuleDef_HEAD_INIT, .m_name = "_remote_debugging", - .m_size = -1, - .m_methods = methods, + .m_size = sizeof(RemoteDebuggingState), + .m_methods = remote_debugging_methods, + .m_slots = remote_debugging_slots, + .m_traverse = remote_debugging_traverse, + .m_clear = remote_debugging_clear, + .m_free = remote_debugging_free, }; PyMODINIT_FUNC PyInit__remote_debugging(void) { - PyObject* mod = PyModule_Create(&module); - if (mod == NULL) { - return NULL; - } -#ifdef Py_GIL_DISABLED - PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED); -#endif - int rc = PyModule_AddIntConstant( - mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); - if (rc < 0) { - Py_DECREF(mod); - return NULL; - } - return mod; + return PyModuleDef_Init(&remote_debugging_module); } diff --git a/Modules/_sqlite/blob.c b/Modules/_sqlite/blob.c index 35d090e3ca2dce..aafefbf316e03d 100644 --- a/Modules/_sqlite/blob.c +++ b/Modules/_sqlite/blob.c @@ -4,6 +4,7 @@ #include "blob.h" #include "util.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #define clinic_state() (pysqlite_get_state_by_type(Py_TYPE(self))) #include "clinic/blob.c.h" @@ -56,9 +57,7 @@ blob_dealloc(PyObject *op) close_blob(self); - if (self->in_weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->in_weakreflist); (void)tp->tp_clear(op); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index c8e1d0b7a738d3..b4415a9bc8fa55 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -235,7 +235,7 @@ pysqlite_connection_cursor(PyObject *self, PyObject *const *args, Py_ssize_t nar } PyDoc_STRVAR(blobopen__doc__, -"blobopen($self, table, column, row, /, *, readonly=False, name=\'main\')\n" +"blobopen($self, table, column, rowid, /, *, readonly=False, name=\'main\')\n" "--\n" "\n" "Open and return a BLOB object.\n" @@ -244,8 +244,8 @@ PyDoc_STRVAR(blobopen__doc__, " Table name.\n" " column\n" " Column name.\n" -" row\n" -" Row index.\n" +" rowid\n" +" Row id.\n" " readonly\n" " Open the BLOB without write permissions.\n" " name\n" @@ -1921,4 +1921,4 @@ getconfig(PyObject *self, PyObject *arg) #ifndef DESERIALIZE_METHODDEF #define DESERIALIZE_METHODDEF #endif /* !defined(DESERIALIZE_METHODDEF) */ -/*[clinic end generated code: output=2f325c2444b4bb47 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7b502667df24ee41 input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/clinic/cursor.c.h b/Modules/_sqlite/clinic/cursor.c.h index 350577f488df4b..3cad9f3aef5ecd 100644 --- a/Modules/_sqlite/clinic/cursor.c.h +++ b/Modules/_sqlite/clinic/cursor.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_long.h" // _PyLong_UInt32_Converter() #include "pycore_modsupport.h" // _PyArg_CheckPositional() static int @@ -181,7 +182,7 @@ PyDoc_STRVAR(pysqlite_cursor_fetchmany__doc__, {"fetchmany", _PyCFunction_CAST(pysqlite_cursor_fetchmany), METH_FASTCALL|METH_KEYWORDS, pysqlite_cursor_fetchmany__doc__}, static PyObject * -pysqlite_cursor_fetchmany_impl(pysqlite_Cursor *self, int maxrows); +pysqlite_cursor_fetchmany_impl(pysqlite_Cursor *self, uint32_t maxrows); static PyObject * pysqlite_cursor_fetchmany(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -216,7 +217,7 @@ pysqlite_cursor_fetchmany(PyObject *self, PyObject *const *args, Py_ssize_t narg #undef KWTUPLE PyObject *argsbuf[1]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - int maxrows = ((pysqlite_Cursor *)self)->arraysize; + uint32_t maxrows = ((pysqlite_Cursor *)self)->arraysize; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -226,8 +227,7 @@ pysqlite_cursor_fetchmany(PyObject *self, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_pos; } - maxrows = PyLong_AsInt(args[0]); - if (maxrows == -1 && PyErr_Occurred()) { + if (!_PyLong_UInt32_Converter(args[0], &maxrows)) { goto exit; } skip_optional_pos: @@ -329,4 +329,46 @@ pysqlite_cursor_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { return pysqlite_cursor_close_impl((pysqlite_Cursor *)self); } -/*[clinic end generated code: output=d05c7cbbc8bcab26 input=a9049054013a1b77]*/ + +#if !defined(_sqlite3_Cursor_arraysize_DOCSTR) +# define _sqlite3_Cursor_arraysize_DOCSTR NULL +#endif +#if defined(_SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF) +# undef _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF +# define _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF {"arraysize", (getter)_sqlite3_Cursor_arraysize_get, (setter)_sqlite3_Cursor_arraysize_set, _sqlite3_Cursor_arraysize_DOCSTR}, +#else +# define _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF {"arraysize", (getter)_sqlite3_Cursor_arraysize_get, NULL, _sqlite3_Cursor_arraysize_DOCSTR}, +#endif + +static PyObject * +_sqlite3_Cursor_arraysize_get_impl(pysqlite_Cursor *self); + +static PyObject * +_sqlite3_Cursor_arraysize_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _sqlite3_Cursor_arraysize_get_impl((pysqlite_Cursor *)self); +} + +#if !defined(_sqlite3_Cursor_arraysize_DOCSTR) +# define _sqlite3_Cursor_arraysize_DOCSTR NULL +#endif +#if defined(_SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF) +# undef _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF +# define _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF {"arraysize", (getter)_sqlite3_Cursor_arraysize_get, (setter)_sqlite3_Cursor_arraysize_set, _sqlite3_Cursor_arraysize_DOCSTR}, +#else +# define _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF {"arraysize", NULL, (setter)_sqlite3_Cursor_arraysize_set, NULL}, +#endif + +static int +_sqlite3_Cursor_arraysize_set_impl(pysqlite_Cursor *self, PyObject *value); + +static int +_sqlite3_Cursor_arraysize_set(PyObject *self, PyObject *value, void *Py_UNUSED(context)) +{ + int return_value; + + return_value = _sqlite3_Cursor_arraysize_set_impl((pysqlite_Cursor *)self, value); + + return return_value; +} +/*[clinic end generated code: output=a0e3ebba9e4d0ece input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 2a184f787542ec..679e987aa75dbf 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -144,7 +144,7 @@ class _sqlite3.Connection "pysqlite_Connection *" "clinic_state()->ConnectionTyp [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=67369db2faf80891]*/ -static void _pysqlite_drop_unused_cursor_references(pysqlite_Connection* self); +static int _pysqlite_drop_unused_cursor_references(pysqlite_Connection* self); static void free_callback_context(callback_context *ctx); static void set_callback_context(callback_context **ctx_pp, callback_context *ctx); @@ -562,7 +562,10 @@ pysqlite_connection_cursor_impl(pysqlite_Connection *self, PyObject *factory) return NULL; } - _pysqlite_drop_unused_cursor_references(self); + if (_pysqlite_drop_unused_cursor_references(self) < 0) { + Py_DECREF(cursor); + return NULL; + } if (cursor && self->row_factory != Py_None) { Py_INCREF(self->row_factory); @@ -579,8 +582,8 @@ _sqlite3.Connection.blobopen as blobopen Table name. column as col: str Column name. - row: sqlite3_int64 - Row index. + rowid as row: sqlite3_int64 + Row id. / * readonly: bool = False @@ -594,7 +597,7 @@ Open and return a BLOB object. static PyObject * blobopen_impl(pysqlite_Connection *self, const char *table, const char *col, sqlite3_int64 row, int readonly, const char *name) -/*[clinic end generated code: output=6a02d43efb885d1c input=23576bd1108d8774]*/ +/*[clinic end generated code: output=6a02d43efb885d1c input=cc3d4b47dac08401]*/ { if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { return NULL; @@ -1061,32 +1064,36 @@ final_callback(sqlite3_context *context) PyGILState_Release(threadstate); } -static void _pysqlite_drop_unused_cursor_references(pysqlite_Connection* self) +static int +_pysqlite_drop_unused_cursor_references(pysqlite_Connection* self) { /* we only need to do this once in a while */ if (self->created_cursors++ < 200) { - return; + return 0; } self->created_cursors = 0; PyObject* new_list = PyList_New(0); if (!new_list) { - return; + return -1; } - for (Py_ssize_t i = 0; i < PyList_Size(self->cursors); i++) { - PyObject* weakref = PyList_GetItem(self->cursors, i); + assert(PyList_CheckExact(self->cursors)); + Py_ssize_t imax = PyList_GET_SIZE(self->cursors); + for (Py_ssize_t i = 0; i < imax; i++) { + PyObject* weakref = PyList_GET_ITEM(self->cursors, i); if (_PyWeakref_IsDead(weakref)) { continue; } if (PyList_Append(new_list, weakref) != 0) { Py_DECREF(new_list); - return; + return -1; } } Py_SETREF(self->cursors, new_list); + return 0; } /* Allocate a UDF/callback context structure. In order to ensure that the state diff --git a/Modules/_sqlite/cursor.c b/Modules/_sqlite/cursor.c index 7943bfcca3679d..cb0f9adcc45a96 100644 --- a/Modules/_sqlite/cursor.c +++ b/Modules/_sqlite/cursor.c @@ -31,6 +31,7 @@ #include "util.h" #include "pycore_pyerrors.h" // _PyErr_FormatFromCause() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() typedef enum { TYPE_LONG, @@ -185,9 +186,7 @@ cursor_dealloc(PyObject *op) pysqlite_Cursor *self = _pysqlite_Cursor_CAST(op); PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - if (self->in_weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->in_weakreflist); (void)tp->tp_clear(op); tp->tp_free(self); Py_DECREF(tp); @@ -472,6 +471,9 @@ static int check_cursor(pysqlite_Cursor* cur) return 0; } + assert(cur->connection != NULL); + assert(cur->connection->state != NULL); + if (cur->closed) { PyErr_SetString(cur->connection->state->ProgrammingError, "Cannot operate on a closed cursor."); @@ -568,43 +570,40 @@ bind_param(pysqlite_state *state, pysqlite_Statement *self, int pos, switch (paramtype) { case TYPE_LONG: { sqlite_int64 value = _pysqlite_long_as_int64(parameter); - if (value == -1 && PyErr_Occurred()) - rc = -1; - else - rc = sqlite3_bind_int64(self->st, pos, value); + rc = (value == -1 && PyErr_Occurred()) + ? SQLITE_ERROR + : sqlite3_bind_int64(self->st, pos, value); break; } case TYPE_FLOAT: { double value = PyFloat_AsDouble(parameter); - if (value == -1 && PyErr_Occurred()) { - rc = -1; - } - else { - rc = sqlite3_bind_double(self->st, pos, value); - } + rc = (value == -1 && PyErr_Occurred()) + ? SQLITE_ERROR + : sqlite3_bind_double(self->st, pos, value); break; } case TYPE_UNICODE: string = PyUnicode_AsUTF8AndSize(parameter, &buflen); - if (string == NULL) - return -1; + if (string == NULL) { + return SQLITE_ERROR; + } if (buflen > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "string longer than INT_MAX bytes"); - return -1; + return SQLITE_ERROR; } rc = sqlite3_bind_text(self->st, pos, string, (int)buflen, SQLITE_TRANSIENT); break; case TYPE_BUFFER: { Py_buffer view; if (PyObject_GetBuffer(parameter, &view, PyBUF_SIMPLE) != 0) { - return -1; + return SQLITE_ERROR; } if (view.len > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "BLOB longer than INT_MAX bytes"); PyBuffer_Release(&view); - return -1; + return SQLITE_ERROR; } rc = sqlite3_bind_blob(self->st, pos, view.buf, (int)view.len, SQLITE_TRANSIENT); PyBuffer_Release(&view); @@ -614,7 +613,7 @@ bind_param(pysqlite_state *state, pysqlite_Statement *self, int pos, PyErr_Format(state->ProgrammingError, "Error binding parameter %d: type '%s' is not supported", pos, Py_TYPE(parameter)->tp_name); - rc = -1; + rc = SQLITE_ERROR; } final: @@ -734,14 +733,17 @@ bind_parameters(pysqlite_state *state, pysqlite_Statement *self, } binding_name++; /* skip first char (the colon) */ - PyObject *current_param; - (void)PyMapping_GetOptionalItemString(parameters, binding_name, &current_param); - if (!current_param) { - if (!PyErr_Occurred() || PyErr_ExceptionMatches(PyExc_LookupError)) { - PyErr_Format(state->ProgrammingError, - "You did not supply a value for binding " - "parameter :%s.", binding_name); - } + PyObject *current_param = NULL; + int found = PyMapping_GetOptionalItemString(parameters, + binding_name, + &current_param); + if (found == -1) { + return; + } + else if (found == 0) { + PyErr_Format(state->ProgrammingError, + "You did not supply a value for binding " + "parameter :%s.", binding_name); return; } @@ -1160,35 +1162,31 @@ pysqlite_cursor_fetchone_impl(pysqlite_Cursor *self) /*[clinic input] _sqlite3.Cursor.fetchmany as pysqlite_cursor_fetchmany - size as maxrows: int(c_default='((pysqlite_Cursor *)self)->arraysize') = 1 + size as maxrows: uint32(c_default='((pysqlite_Cursor *)self)->arraysize') = 1 The default value is set by the Cursor.arraysize attribute. Fetches several rows from the resultset. [clinic start generated code]*/ static PyObject * -pysqlite_cursor_fetchmany_impl(pysqlite_Cursor *self, int maxrows) -/*[clinic end generated code: output=a8ef31fea64d0906 input=035dbe44a1005bf2]*/ +pysqlite_cursor_fetchmany_impl(pysqlite_Cursor *self, uint32_t maxrows) +/*[clinic end generated code: output=3325f2b477c71baf input=a509c412aa70b27e]*/ { PyObject* row; PyObject* list; - int counter = 0; list = PyList_New(0); if (!list) { return NULL; } - while ((row = pysqlite_cursor_iternext((PyObject *)self))) { - if (PyList_Append(list, row) < 0) { - Py_DECREF(row); - break; - } + while (maxrows > 0 && (row = pysqlite_cursor_iternext((PyObject *)self))) { + int rc = PyList_Append(list, row); Py_DECREF(row); - - if (++counter == maxrows) { + if (rc < 0) { break; } + maxrows--; } if (PyErr_Occurred()) { @@ -1302,6 +1300,30 @@ pysqlite_cursor_close_impl(pysqlite_Cursor *self) Py_RETURN_NONE; } +/*[clinic input] +@getter +_sqlite3.Cursor.arraysize +[clinic start generated code]*/ + +static PyObject * +_sqlite3_Cursor_arraysize_get_impl(pysqlite_Cursor *self) +/*[clinic end generated code: output=e0919d97175e6c50 input=3278f8d3ecbd90e3]*/ +{ + return PyLong_FromUInt32(self->arraysize); +} + +/*[clinic input] +@setter +_sqlite3.Cursor.arraysize +[clinic start generated code]*/ + +static int +_sqlite3_Cursor_arraysize_set_impl(pysqlite_Cursor *self, PyObject *value) +/*[clinic end generated code: output=af59a6b09f8cce6e input=ace48cb114e26060]*/ +{ + return PyLong_AsUInt32(value, &self->arraysize); +} + static PyMethodDef cursor_methods[] = { PYSQLITE_CURSOR_CLOSE_METHODDEF PYSQLITE_CURSOR_EXECUTEMANY_METHODDEF @@ -1319,7 +1341,6 @@ static struct PyMemberDef cursor_members[] = { {"connection", _Py_T_OBJECT, offsetof(pysqlite_Cursor, connection), Py_READONLY}, {"description", _Py_T_OBJECT, offsetof(pysqlite_Cursor, description), Py_READONLY}, - {"arraysize", Py_T_INT, offsetof(pysqlite_Cursor, arraysize), 0}, {"lastrowid", _Py_T_OBJECT, offsetof(pysqlite_Cursor, lastrowid), Py_READONLY}, {"rowcount", Py_T_LONG, offsetof(pysqlite_Cursor, rowcount), Py_READONLY}, {"row_factory", _Py_T_OBJECT, offsetof(pysqlite_Cursor, row_factory), 0}, @@ -1327,6 +1348,11 @@ static struct PyMemberDef cursor_members[] = {NULL} }; +static struct PyGetSetDef cursor_getsets[] = { + _SQLITE3_CURSOR_ARRAYSIZE_GETSETDEF + {NULL}, +}; + static const char cursor_doc[] = PyDoc_STR("SQLite database cursor class."); @@ -1337,6 +1363,7 @@ static PyType_Slot cursor_slots[] = { {Py_tp_iternext, pysqlite_cursor_iternext}, {Py_tp_methods, cursor_methods}, {Py_tp_members, cursor_members}, + {Py_tp_getset, cursor_getsets}, {Py_tp_init, pysqlite_cursor_init}, {Py_tp_traverse, cursor_traverse}, {Py_tp_clear, cursor_clear}, diff --git a/Modules/_sqlite/cursor.h b/Modules/_sqlite/cursor.h index 42f817af7c54ad..c840a3d7ed0d15 100644 --- a/Modules/_sqlite/cursor.h +++ b/Modules/_sqlite/cursor.h @@ -35,7 +35,7 @@ typedef struct pysqlite_Connection* connection; PyObject* description; PyObject* row_cast_map; - int arraysize; + uint32_t arraysize; PyObject* lastrowid; long rowcount; PyObject* row_factory; diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c index 27e8dab92e0e67..fcc1d69c707ebe 100644 --- a/Modules/_sqlite/module.c +++ b/Modules/_sqlite/module.c @@ -745,7 +745,6 @@ module_exec(PyObject *module) return 0; error: - sqlite3_shutdown(); return -1; } diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 602d0ab8588f62..249d6fffc32cc2 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -44,6 +44,7 @@ static const char copyright[] = #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_unicodeobject.h" // _PyUnicode_Copy +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "sre.h" // SRE_CODE @@ -736,10 +737,7 @@ pattern_dealloc(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - PatternObject *obj = _PatternObject_CAST(self); - if (obj->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist); (void)pattern_clear(self); tp->tp_free(self); Py_DECREF(tp); @@ -1944,7 +1942,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) sre_match() code is robust even if they don't, and the worst you can get is nonsensical match results. */ GET_ARG; - if (arg > 2 * (size_t)groups + 1) { + if (arg >= 2 * (size_t)groups) { VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); FAIL; } @@ -2357,7 +2355,7 @@ match_getindex(MatchObject* self, PyObject* index) } // Check that i*2 cannot overflow to make static analyzers happy - assert(i <= SRE_MAXGROUPS); + assert((size_t)i <= SRE_MAXGROUPS); return i; } @@ -2837,20 +2835,25 @@ scanner_dealloc(PyObject *self) static int scanner_begin(ScannerObject* self) { - if (self->executing) { +#ifdef Py_GIL_DISABLED + int was_executing = _Py_atomic_exchange_int(&self->executing, 1); +#else + int was_executing = self->executing; + self->executing = 1; +#endif + if (was_executing) { PyErr_SetString(PyExc_ValueError, "regular expression scanner already executing"); return 0; } - self->executing = 1; return 1; } static void scanner_end(ScannerObject* self) { - assert(self->executing); - self->executing = 0; + assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing)); + FT_ATOMIC_STORE_INT(self->executing, 0); } /*[clinic input] diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 97a29f4d0e1830..417d5ca593158f 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -43,14 +43,14 @@ /* Redefined below for Windows debug builds after important #includes */ #define _PySSL_FIX_ERRNO -#define PySSL_BEGIN_ALLOW_THREADS_S(save) \ - do { (save) = PyEval_SaveThread(); } while(0) -#define PySSL_END_ALLOW_THREADS_S(save) \ - do { PyEval_RestoreThread(save); _PySSL_FIX_ERRNO; } while(0) -#define PySSL_BEGIN_ALLOW_THREADS { \ +#define PySSL_BEGIN_ALLOW_THREADS_S(save, mutex) \ + do { (save) = PyEval_SaveThread(); PyMutex_Lock(mutex); } while(0) +#define PySSL_END_ALLOW_THREADS_S(save, mutex) \ + do { PyMutex_Unlock(mutex); PyEval_RestoreThread(save); _PySSL_FIX_ERRNO; } while(0) +#define PySSL_BEGIN_ALLOW_THREADS(self) { \ PyThreadState *_save = NULL; \ - PySSL_BEGIN_ALLOW_THREADS_S(_save); -#define PySSL_END_ALLOW_THREADS PySSL_END_ALLOW_THREADS_S(_save); } + PySSL_BEGIN_ALLOW_THREADS_S(_save, &self->tstate_mutex); +#define PySSL_END_ALLOW_THREADS(self) PySSL_END_ALLOW_THREADS_S(_save, &self->tstate_mutex); } #if defined(HAVE_POLL_H) #include <poll.h> @@ -123,7 +123,7 @@ static void _PySSLFixErrno(void) { /* Include generated data (error codes) */ /* See make_ssl_data.h for notes on adding a new version. */ #if (OPENSSL_VERSION_NUMBER >= 0x30401000L) -#include "_ssl_data_34.h" +#include "_ssl_data_35.h" #elif (OPENSSL_VERSION_NUMBER >= 0x30100000L) #include "_ssl_data_340.h" #elif (OPENSSL_VERSION_NUMBER >= 0x30000000L) @@ -309,6 +309,9 @@ typedef struct { PyObject *psk_client_callback; PyObject *psk_server_callback; #endif + /* Lock to synchronize calls when the thread state is detached. + See also gh-134698. */ + PyMutex tstate_mutex; } PySSLContext; #define PySSLContext_CAST(op) ((PySSLContext *)(op)) @@ -563,7 +566,7 @@ fill_and_set_sslerror(_sslmodulestate *state, goto fail; } } - if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) { goto fail; } } @@ -889,9 +892,9 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, /* Make sure the SSL error state is initialized */ ERR_clear_error(); - PySSL_BEGIN_ALLOW_THREADS + PySSL_BEGIN_ALLOW_THREADS(sslctx) self->ssl = SSL_new(ctx); - PySSL_END_ALLOW_THREADS + PySSL_END_ALLOW_THREADS(sslctx) if (self->ssl == NULL) { Py_DECREF(self); _setSSLError(get_state_ctx(self), NULL, 0, __FILE__, __LINE__); @@ -907,7 +910,7 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, } /* bpo43522 and OpenSSL < 1.1.1l: copy hostflags manually */ -#if OPENSSL_VERSION < 0x101010cf +#if OPENSSL_VERSION_NUMBER < 0x101010cf X509_VERIFY_PARAM *ssl_verification_params = SSL_get0_param(self->ssl); X509_VERIFY_PARAM *ssl_ctx_verification_params = SSL_CTX_get0_param(ctx); @@ -960,12 +963,12 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, BIO_set_nbio(SSL_get_wbio(self->ssl), 1); } - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS; if (socket_type == PY_SSL_CLIENT) SSL_set_connect_state(self->ssl); else SSL_set_accept_state(self->ssl); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; self->socket_type = socket_type; if (sock != NULL) { @@ -1034,10 +1037,11 @@ _ssl__SSLSocket_do_handshake_impl(PySSLSocket *self) /* Actually negotiate SSL connection */ /* XXX If SSL_do_handshake() returns 0, it's also a failure. */ do { - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS ret = SSL_do_handshake(self->ssl); err = _PySSL_errno(ret < 1, self->ssl, ret); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; + _PySSL_FIX_ERRNO; self->err = err; if (PyErr_CheckSignals()) @@ -1406,14 +1410,14 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) { } PyTuple_SET_ITEM(t, 0, v); - if (name->d.ip->length == 4) { - unsigned char *p = name->d.ip->data; + if (ASN1_STRING_length(name->d.ip) == 4) { + const unsigned char *p = ASN1_STRING_get0_data(name->d.ip); v = PyUnicode_FromFormat( "%d.%d.%d.%d", p[0], p[1], p[2], p[3] ); - } else if (name->d.ip->length == 16) { - unsigned char *p = name->d.ip->data; + } else if (ASN1_STRING_length(name->d.ip) == 16) { + const unsigned char *p = ASN1_STRING_get0_data(name->d.ip); v = PyUnicode_FromFormat( "%X:%X:%X:%X:%X:%X:%X:%X", p[0] << 8 | p[1], @@ -1544,8 +1548,9 @@ _get_aia_uri(X509 *certificate, int nid) { continue; } uri = ad->location->d.uniformResourceIdentifier; - ostr = PyUnicode_FromStringAndSize((char *)uri->data, - uri->length); + ostr = PyUnicode_FromStringAndSize( + (const char *)ASN1_STRING_get0_data(uri), + ASN1_STRING_length(uri)); if (ostr == NULL) { goto fail; } @@ -1611,8 +1616,9 @@ _get_crl_dp(X509 *certificate) { continue; } uri = gn->d.uniformResourceIdentifier; - ouri = PyUnicode_FromStringAndSize((char *)uri->data, - uri->length); + ouri = PyUnicode_FromStringAndSize( + (const char *)ASN1_STRING_get0_data(uri), + ASN1_STRING_length(uri)); if (ouri == NULL) goto done; @@ -1827,14 +1833,14 @@ _certificate_to_der(_sslmodulestate *state, X509 *certificate) /*[clinic input] _ssl._test_decode_cert - path: object(converter="PyUnicode_FSConverter") + path: unicode_fs_encoded / [clinic start generated code]*/ static PyObject * _ssl__test_decode_cert_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=96becb9abb23c091 input=cdeaaf02d4346628]*/ +/*[clinic end generated code: output=96becb9abb23c091 input=cb4988d5e651a4f8]*/ { PyObject *retval = NULL; X509 *x=NULL; @@ -1864,7 +1870,6 @@ _ssl__test_decode_cert_impl(PyObject *module, PyObject *path) X509_free(x); fail0: - Py_DECREF(path); if (cert != NULL) BIO_free(cert); return retval; } @@ -2414,9 +2419,10 @@ PySSL_select(PySocketSockObject *s, int writing, PyTime_t timeout) ms = (int)_PyTime_AsMilliseconds(timeout, _PyTime_ROUND_CEILING); assert(ms <= INT_MAX); - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS rc = poll(&pollfd, 1, (int)ms); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; #else /* Guard against socket too large for select*/ if (!_PyIsSelectable_fd(s->sock_fd)) @@ -2428,13 +2434,14 @@ PySSL_select(PySocketSockObject *s, int writing, PyTime_t timeout) FD_SET(s->sock_fd, &fds); /* Wait until the socket becomes ready */ - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS nfds = Py_SAFE_DOWNCAST(s->sock_fd+1, SOCKET_T, int); if (writing) rc = select(nfds, NULL, &fds, NULL, &tv); else rc = select(nfds, &fds, NULL, NULL, &tv); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; #endif /* Return SOCKET_TIMED_OUT on timeout, SOCKET_OPERATION_OK otherwise @@ -2505,10 +2512,11 @@ _ssl__SSLSocket_write_impl(PySSLSocket *self, Py_buffer *b) } do { - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS; retval = SSL_write_ex(self->ssl, b->buf, (size_t)b->len, &count); err = _PySSL_errno(retval == 0, self->ssl, retval); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; + _PySSL_FIX_ERRNO; self->err = err; if (PyErr_CheckSignals()) @@ -2566,10 +2574,11 @@ _ssl__SSLSocket_pending_impl(PySSLSocket *self) int count = 0; _PySSLError err; - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS; count = SSL_pending(self->ssl); err = _PySSL_errno(count < 0, self->ssl, count); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; + _PySSL_FIX_ERRNO; self->err = err; if (count < 0) @@ -2660,10 +2669,11 @@ _ssl__SSLSocket_read_impl(PySSLSocket *self, Py_ssize_t len, deadline = _PyDeadline_Init(timeout); do { - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS; retval = SSL_read_ex(self->ssl, mem, (size_t)len, &count); err = _PySSL_errno(retval == 0, self->ssl, retval); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; + _PySSL_FIX_ERRNO; self->err = err; if (PyErr_CheckSignals()) @@ -2762,7 +2772,7 @@ _ssl__SSLSocket_shutdown_impl(PySSLSocket *self) } while (1) { - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS; /* Disable read-ahead so that unwrap can work correctly. * Otherwise OpenSSL might read in too much data, * eating clear text data that happens to be @@ -2775,7 +2785,8 @@ _ssl__SSLSocket_shutdown_impl(PySSLSocket *self) SSL_set_read_ahead(self->ssl, 0); ret = SSL_shutdown(self->ssl); err = _PySSL_errno(ret < 0, self->ssl, ret); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS; + _PySSL_FIX_ERRNO; self->err = err; /* If err == 1, a secure shutdown with SSL_shutdown() is complete */ @@ -3167,9 +3178,10 @@ _ssl__SSLContext_impl(PyTypeObject *type, int proto_version) // no other thread can be touching this object yet. // (Technically, we can't even lock if we wanted to, as the // lock hasn't been initialized yet.) - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS ctx = SSL_CTX_new(method); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; if (ctx == NULL) { _setSSLError(get_ssl_state(module), NULL, 0, __FILE__, __LINE__); @@ -3194,6 +3206,7 @@ _ssl__SSLContext_impl(PyTypeObject *type, int proto_version) self->psk_client_callback = NULL; self->psk_server_callback = NULL; #endif + self->tstate_mutex = (PyMutex){0}; /* Don't check host name by default */ if (proto_version == PY_SSL_VERSION_TLS_CLIENT) { @@ -3312,9 +3325,10 @@ context_clear(PyObject *op) Py_CLEAR(self->psk_server_callback); #endif if (self->keylog_bio != NULL) { - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS BIO_free_all(self->keylog_bio); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; self->keylog_bio = NULL; } return 0; @@ -4037,7 +4051,8 @@ _password_callback(char *buf, int size, int rwflag, void *userdata) _PySSLPasswordInfo *pw_info = (_PySSLPasswordInfo*) userdata; PyObject *fn_ret = NULL; - PySSL_END_ALLOW_THREADS_S(pw_info->thread_state); + pw_info->thread_state = PyThreadState_Swap(pw_info->thread_state); + _PySSL_FIX_ERRNO; if (pw_info->error) { /* already failed previously. OpenSSL 3.0.0-alpha14 invokes the @@ -4067,13 +4082,13 @@ _password_callback(char *buf, int size, int rwflag, void *userdata) goto error; } - PySSL_BEGIN_ALLOW_THREADS_S(pw_info->thread_state); + pw_info->thread_state = PyThreadState_Swap(pw_info->thread_state); memcpy(buf, pw_info->password, pw_info->size); return pw_info->size; error: Py_XDECREF(fn_ret); - PySSL_BEGIN_ALLOW_THREADS_S(pw_info->thread_state); + pw_info->thread_state = PyThreadState_Swap(pw_info->thread_state); pw_info->error = 1; return -1; } @@ -4126,10 +4141,10 @@ _ssl__SSLContext_load_cert_chain_impl(PySSLContext *self, PyObject *certfile, SSL_CTX_set_default_passwd_cb(self->ctx, _password_callback); SSL_CTX_set_default_passwd_cb_userdata(self->ctx, &pw_info); } - PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); r = SSL_CTX_use_certificate_chain_file(self->ctx, PyBytes_AS_STRING(certfile_bytes)); - PySSL_END_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_END_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); if (r != 1) { if (pw_info.error) { ERR_clear_error(); @@ -4144,11 +4159,11 @@ _ssl__SSLContext_load_cert_chain_impl(PySSLContext *self, PyObject *certfile, } goto error; } - PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); r = SSL_CTX_use_PrivateKey_file(self->ctx, PyBytes_AS_STRING(keyfile ? keyfile_bytes : certfile_bytes), SSL_FILETYPE_PEM); - PySSL_END_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_END_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); Py_CLEAR(keyfile_bytes); Py_CLEAR(certfile_bytes); if (r != 1) { @@ -4165,9 +4180,9 @@ _ssl__SSLContext_load_cert_chain_impl(PySSLContext *self, PyObject *certfile, } goto error; } - PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_BEGIN_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); r = SSL_CTX_check_private_key(self->ctx); - PySSL_END_ALLOW_THREADS_S(pw_info.thread_state); + PySSL_END_ALLOW_THREADS_S(pw_info.thread_state, &self->tstate_mutex); if (r != 1) { _setSSLError(get_state_ctx(self), NULL, 0, __FILE__, __LINE__); goto error; @@ -4384,9 +4399,9 @@ _ssl__SSLContext_load_verify_locations_impl(PySSLContext *self, cafile_buf = PyBytes_AS_STRING(cafile_bytes); if (capath) capath_buf = PyBytes_AS_STRING(capath_bytes); - PySSL_BEGIN_ALLOW_THREADS + PySSL_BEGIN_ALLOW_THREADS(self) r = SSL_CTX_load_verify_locations(self->ctx, cafile_buf, capath_buf); - PySSL_END_ALLOW_THREADS + PySSL_END_ALLOW_THREADS(self) if (r != 1) { if (errno != 0) { PyErr_SetFromErrno(PyExc_OSError); @@ -4438,10 +4453,11 @@ _ssl__SSLContext_load_dh_params_impl(PySSLContext *self, PyObject *filepath) return NULL; errno = 0; - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS dh = PEM_read_DHparams(f, NULL, NULL, NULL); fclose(f); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; if (dh == NULL) { if (errno != 0) { PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, filepath); @@ -4593,6 +4609,7 @@ _ssl__SSLContext_set_default_verify_paths_impl(PySSLContext *self) Py_BEGIN_ALLOW_THREADS rc = SSL_CTX_set_default_verify_paths(self->ctx); Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; if (!rc) { _setSSLError(get_state_ctx(self), NULL, 0, __FILE__, __LINE__); return NULL; diff --git a/Modules/_ssl/debughelpers.c b/Modules/_ssl/debughelpers.c index 7c0b4876f4353a..608ef07b5c5e43 100644 --- a/Modules/_ssl/debughelpers.c +++ b/Modules/_ssl/debughelpers.c @@ -140,13 +140,14 @@ _PySSL_keylog_callback(const SSL *ssl, const char *line) * critical debug helper. */ - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS PyThread_acquire_lock(lock, 1); res = BIO_printf(ssl_obj->ctx->keylog_bio, "%s\n", line); e = errno; (void)BIO_flush(ssl_obj->ctx->keylog_bio); PyThread_release_lock(lock); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; if (res == -1) { errno = e; @@ -187,9 +188,10 @@ _PySSLContext_set_keylog_filename(PyObject *op, PyObject *arg, if (self->keylog_bio != NULL) { BIO *bio = self->keylog_bio; self->keylog_bio = NULL; - PySSL_BEGIN_ALLOW_THREADS + Py_BEGIN_ALLOW_THREADS BIO_free_all(bio); - PySSL_END_ALLOW_THREADS + Py_END_ALLOW_THREADS + _PySSL_FIX_ERRNO; } if (arg == Py_None) { @@ -211,13 +213,13 @@ _PySSLContext_set_keylog_filename(PyObject *op, PyObject *arg, self->keylog_filename = Py_NewRef(arg); /* Write a header for seekable, empty files (this excludes pipes). */ - PySSL_BEGIN_ALLOW_THREADS + PySSL_BEGIN_ALLOW_THREADS(self) if (BIO_tell(self->keylog_bio) == 0) { BIO_puts(self->keylog_bio, "# TLS secrets log file, generated by OpenSSL / Python\n"); (void)BIO_flush(self->keylog_bio); } - PySSL_END_ALLOW_THREADS + PySSL_END_ALLOW_THREADS(self) SSL_CTX_set_keylog_callback(self->ctx, _PySSL_keylog_callback); return 0; } diff --git a/Modules/_ssl_data_34.h b/Modules/_ssl_data_35.h similarity index 98% rename from Modules/_ssl_data_34.h rename to Modules/_ssl_data_35.h index 99718c5e605acf..e4919b550e3a89 100644 --- a/Modules/_ssl_data_34.h +++ b/Modules/_ssl_data_35.h @@ -1,6 +1,6 @@ /* File generated by Tools/ssl/make_ssl_data.py */ -/* Generated on 2025-03-26T13:47:34.223146+00:00 */ -/* Generated from Git commit openssl-3.4.1-0-ga26d85337d */ +/* Generated on 2025-10-04T17:49:19.148321+00:00 */ +/* Generated from Git commit openssl-3.5.4-0-gc1eeb9406 */ /* generated from args.lib2errnum */ static struct py_ssl_library_code library_codes[] = { @@ -1283,6 +1283,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"FAILED_BUILDING_OWN_CHAIN", 58, 164}, #endif + #ifdef CMP_R_FAILED_EXTRACTING_CENTRAL_GEN_KEY + {"FAILED_EXTRACTING_CENTRAL_GEN_KEY", ERR_LIB_CMP, CMP_R_FAILED_EXTRACTING_CENTRAL_GEN_KEY}, + #else + {"FAILED_EXTRACTING_CENTRAL_GEN_KEY", 58, 203}, + #endif #ifdef CMP_R_FAILED_EXTRACTING_PUBKEY {"FAILED_EXTRACTING_PUBKEY", ERR_LIB_CMP, CMP_R_FAILED_EXTRACTING_PUBKEY}, #else @@ -1343,6 +1348,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"INVALID_ROOTCAKEYUPDATE", 58, 195}, #endif + #ifdef CMP_R_MISSING_CENTRAL_GEN_KEY + {"MISSING_CENTRAL_GEN_KEY", ERR_LIB_CMP, CMP_R_MISSING_CENTRAL_GEN_KEY}, + #else + {"MISSING_CENTRAL_GEN_KEY", 58, 204}, + #endif #ifdef CMP_R_MISSING_CERTID {"MISSING_CERTID", ERR_LIB_CMP, CMP_R_MISSING_CERTID}, #else @@ -1513,6 +1523,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"UNCLEAN_CTX", 58, 191}, #endif + #ifdef CMP_R_UNEXPECTED_CENTRAL_GEN_KEY + {"UNEXPECTED_CENTRAL_GEN_KEY", ERR_LIB_CMP, CMP_R_UNEXPECTED_CENTRAL_GEN_KEY}, + #else + {"UNEXPECTED_CENTRAL_GEN_KEY", 58, 205}, + #endif #ifdef CMP_R_UNEXPECTED_CERTPROFILE {"UNEXPECTED_CERTPROFILE", ERR_LIB_CMP, CMP_R_UNEXPECTED_CERTPROFILE}, #else @@ -2308,6 +2323,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"BAD_PBM_ITERATIONCOUNT", 56, 100}, #endif + #ifdef CRMF_R_CMS_NOT_SUPPORTED + {"CMS_NOT_SUPPORTED", ERR_LIB_CRMF, CRMF_R_CMS_NOT_SUPPORTED}, + #else + {"CMS_NOT_SUPPORTED", 56, 122}, + #endif #ifdef CRMF_R_CRMFERROR {"CRMFERROR", ERR_LIB_CRMF, CRMF_R_CRMFERROR}, #else @@ -2323,16 +2343,41 @@ static struct py_ssl_error_code error_codes[] = { #else {"ERROR_DECODING_CERTIFICATE", 56, 104}, #endif + #ifdef CRMF_R_ERROR_DECODING_ENCRYPTEDKEY + {"ERROR_DECODING_ENCRYPTEDKEY", ERR_LIB_CRMF, CRMF_R_ERROR_DECODING_ENCRYPTEDKEY}, + #else + {"ERROR_DECODING_ENCRYPTEDKEY", 56, 123}, + #endif #ifdef CRMF_R_ERROR_DECRYPTING_CERTIFICATE {"ERROR_DECRYPTING_CERTIFICATE", ERR_LIB_CRMF, CRMF_R_ERROR_DECRYPTING_CERTIFICATE}, #else {"ERROR_DECRYPTING_CERTIFICATE", 56, 105}, #endif + #ifdef CRMF_R_ERROR_DECRYPTING_ENCRYPTEDKEY + {"ERROR_DECRYPTING_ENCRYPTEDKEY", ERR_LIB_CRMF, CRMF_R_ERROR_DECRYPTING_ENCRYPTEDKEY}, + #else + {"ERROR_DECRYPTING_ENCRYPTEDKEY", 56, 124}, + #endif + #ifdef CRMF_R_ERROR_DECRYPTING_ENCRYPTEDVALUE + {"ERROR_DECRYPTING_ENCRYPTEDVALUE", ERR_LIB_CRMF, CRMF_R_ERROR_DECRYPTING_ENCRYPTEDVALUE}, + #else + {"ERROR_DECRYPTING_ENCRYPTEDVALUE", 56, 125}, + #endif #ifdef CRMF_R_ERROR_DECRYPTING_SYMMETRIC_KEY {"ERROR_DECRYPTING_SYMMETRIC_KEY", ERR_LIB_CRMF, CRMF_R_ERROR_DECRYPTING_SYMMETRIC_KEY}, #else {"ERROR_DECRYPTING_SYMMETRIC_KEY", 56, 106}, #endif + #ifdef CRMF_R_ERROR_SETTING_PURPOSE + {"ERROR_SETTING_PURPOSE", ERR_LIB_CRMF, CRMF_R_ERROR_SETTING_PURPOSE}, + #else + {"ERROR_SETTING_PURPOSE", 56, 126}, + #endif + #ifdef CRMF_R_ERROR_VERIFYING_ENCRYPTEDKEY + {"ERROR_VERIFYING_ENCRYPTEDKEY", ERR_LIB_CRMF, CRMF_R_ERROR_VERIFYING_ENCRYPTEDKEY}, + #else + {"ERROR_VERIFYING_ENCRYPTEDKEY", 56, 127}, + #endif #ifdef CRMF_R_FAILURE_OBTAINING_RANDOM {"FAILURE_OBTAINING_RANDOM", ERR_LIB_CRMF, CRMF_R_FAILURE_OBTAINING_RANDOM}, #else @@ -2358,6 +2403,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"POPOSKINPUT_NOT_SUPPORTED", 56, 113}, #endif + #ifdef CRMF_R_POPO_INCONSISTENT_CENTRAL_KEYGEN + {"POPO_INCONSISTENT_CENTRAL_KEYGEN", ERR_LIB_CRMF, CRMF_R_POPO_INCONSISTENT_CENTRAL_KEYGEN}, + #else + {"POPO_INCONSISTENT_CENTRAL_KEYGEN", 56, 128}, + #endif #ifdef CRMF_R_POPO_INCONSISTENT_PUBLIC_KEY {"POPO_INCONSISTENT_PUBLIC_KEY", ERR_LIB_CRMF, CRMF_R_POPO_INCONSISTENT_PUBLIC_KEY}, #else @@ -3963,6 +4013,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"PBKDF2_ERROR", 6, 181}, #endif + #ifdef EVP_R_PIPELINE_NOT_SUPPORTED + {"PIPELINE_NOT_SUPPORTED", ERR_LIB_EVP, EVP_R_PIPELINE_NOT_SUPPORTED}, + #else + {"PIPELINE_NOT_SUPPORTED", 6, 230}, + #endif #ifdef EVP_R_PKEY_APPLICATION_ASN1_METHOD_ALREADY_REGISTERED {"PKEY_APPLICATION_ASN1_METHOD_ALREADY_REGISTERED", ERR_LIB_EVP, EVP_R_PKEY_APPLICATION_ASN1_METHOD_ALREADY_REGISTERED}, #else @@ -3978,6 +4033,36 @@ static struct py_ssl_error_code error_codes[] = { #else {"PRIVATE_KEY_ENCODE_ERROR", 6, 146}, #endif + #ifdef EVP_R_PROVIDER_ASYM_CIPHER_FAILURE + {"PROVIDER_ASYM_CIPHER_FAILURE", ERR_LIB_EVP, EVP_R_PROVIDER_ASYM_CIPHER_FAILURE}, + #else + {"PROVIDER_ASYM_CIPHER_FAILURE", 6, 232}, + #endif + #ifdef EVP_R_PROVIDER_ASYM_CIPHER_NOT_SUPPORTED + {"PROVIDER_ASYM_CIPHER_NOT_SUPPORTED", ERR_LIB_EVP, EVP_R_PROVIDER_ASYM_CIPHER_NOT_SUPPORTED}, + #else + {"PROVIDER_ASYM_CIPHER_NOT_SUPPORTED", 6, 235}, + #endif + #ifdef EVP_R_PROVIDER_KEYMGMT_FAILURE + {"PROVIDER_KEYMGMT_FAILURE", ERR_LIB_EVP, EVP_R_PROVIDER_KEYMGMT_FAILURE}, + #else + {"PROVIDER_KEYMGMT_FAILURE", 6, 233}, + #endif + #ifdef EVP_R_PROVIDER_KEYMGMT_NOT_SUPPORTED + {"PROVIDER_KEYMGMT_NOT_SUPPORTED", ERR_LIB_EVP, EVP_R_PROVIDER_KEYMGMT_NOT_SUPPORTED}, + #else + {"PROVIDER_KEYMGMT_NOT_SUPPORTED", 6, 236}, + #endif + #ifdef EVP_R_PROVIDER_SIGNATURE_FAILURE + {"PROVIDER_SIGNATURE_FAILURE", ERR_LIB_EVP, EVP_R_PROVIDER_SIGNATURE_FAILURE}, + #else + {"PROVIDER_SIGNATURE_FAILURE", 6, 234}, + #endif + #ifdef EVP_R_PROVIDER_SIGNATURE_NOT_SUPPORTED + {"PROVIDER_SIGNATURE_NOT_SUPPORTED", ERR_LIB_EVP, EVP_R_PROVIDER_SIGNATURE_NOT_SUPPORTED}, + #else + {"PROVIDER_SIGNATURE_NOT_SUPPORTED", 6, 237}, + #endif #ifdef EVP_R_PUBLIC_KEY_NOT_RSA {"PUBLIC_KEY_NOT_RSA", ERR_LIB_EVP, EVP_R_PUBLIC_KEY_NOT_RSA}, #else @@ -3998,6 +4083,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"SIGNATURE_TYPE_AND_KEY_TYPE_INCOMPATIBLE", 6, 228}, #endif + #ifdef EVP_R_TOO_MANY_PIPES + {"TOO_MANY_PIPES", ERR_LIB_EVP, EVP_R_TOO_MANY_PIPES}, + #else + {"TOO_MANY_PIPES", 6, 231}, + #endif #ifdef EVP_R_TOO_MANY_RECORDS {"TOO_MANY_RECORDS", ERR_LIB_EVP, EVP_R_TOO_MANY_RECORDS}, #else @@ -4753,6 +4843,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"UNSUPPORTED_PUBLIC_KEY_TYPE", 9, 110}, #endif + #ifdef PEM_R_UNSUPPORTED_PVK_KEY_TYPE + {"UNSUPPORTED_PVK_KEY_TYPE", ERR_LIB_PEM, PEM_R_UNSUPPORTED_PVK_KEY_TYPE}, + #else + {"UNSUPPORTED_PVK_KEY_TYPE", 9, 133}, + #endif #ifdef PKCS12_R_CALLBACK_FAILED {"CALLBACK_FAILED", ERR_LIB_PKCS12, PKCS12_R_CALLBACK_FAILED}, #else @@ -5243,6 +5338,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"FIPS_MODULE_ENTERING_ERROR_STATE", 57, 224}, #endif + #ifdef PROV_R_FIPS_MODULE_IMPORT_PCT_ERROR + {"FIPS_MODULE_IMPORT_PCT_ERROR", ERR_LIB_PROV, PROV_R_FIPS_MODULE_IMPORT_PCT_ERROR}, + #else + {"FIPS_MODULE_IMPORT_PCT_ERROR", 57, 253}, + #endif #ifdef PROV_R_FIPS_MODULE_IN_ERROR_STATE {"FIPS_MODULE_IN_ERROR_STATE", ERR_LIB_PROV, PROV_R_FIPS_MODULE_IN_ERROR_STATE}, #else @@ -5543,6 +5643,16 @@ static struct py_ssl_error_code error_codes[] = { #else {"MISSING_XCGHASH", 57, 135}, #endif + #ifdef PROV_R_ML_DSA_NO_FORMAT + {"ML_DSA_NO_FORMAT", ERR_LIB_PROV, PROV_R_ML_DSA_NO_FORMAT}, + #else + {"ML_DSA_NO_FORMAT", 57, 245}, + #endif + #ifdef PROV_R_ML_KEM_NO_FORMAT + {"ML_KEM_NO_FORMAT", ERR_LIB_PROV, PROV_R_ML_KEM_NO_FORMAT}, + #else + {"ML_KEM_NO_FORMAT", 57, 246}, + #endif #ifdef PROV_R_MODULE_INTEGRITY_FAILURE {"MODULE_INTEGRITY_FAILURE", ERR_LIB_PROV, PROV_R_MODULE_INTEGRITY_FAILURE}, #else @@ -5593,6 +5703,16 @@ static struct py_ssl_error_code error_codes[] = { #else {"NO_PARAMETERS_SET", 57, 177}, #endif + #ifdef PROV_R_NULL_LENGTH_POINTER + {"NULL_LENGTH_POINTER", ERR_LIB_PROV, PROV_R_NULL_LENGTH_POINTER}, + #else + {"NULL_LENGTH_POINTER", 57, 247}, + #endif + #ifdef PROV_R_NULL_OUTPUT_BUFFER + {"NULL_OUTPUT_BUFFER", ERR_LIB_PROV, PROV_R_NULL_OUTPUT_BUFFER}, + #else + {"NULL_OUTPUT_BUFFER", 57, 248}, + #endif #ifdef PROV_R_ONESHOT_CALL_OUT_OF_ORDER {"ONESHOT_CALL_OUT_OF_ORDER", ERR_LIB_PROV, PROV_R_ONESHOT_CALL_OUT_OF_ORDER}, #else @@ -5728,6 +5848,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"UNABLE_TO_RESEED", 57, 204}, #endif + #ifdef PROV_R_UNEXPECTED_KEY_PARAMETERS + {"UNEXPECTED_KEY_PARAMETERS", ERR_LIB_PROV, PROV_R_UNEXPECTED_KEY_PARAMETERS}, + #else + {"UNEXPECTED_KEY_PARAMETERS", 57, 249}, + #endif #ifdef PROV_R_UNSUPPORTED_CEK_ALG {"UNSUPPORTED_CEK_ALG", ERR_LIB_PROV, PROV_R_UNSUPPORTED_CEK_ALG}, #else @@ -5748,6 +5873,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"UNSUPPORTED_NUMBER_OF_ROUNDS", 57, 152}, #endif + #ifdef PROV_R_UNSUPPORTED_SELECTION + {"UNSUPPORTED_SELECTION", ERR_LIB_PROV, PROV_R_UNSUPPORTED_SELECTION}, + #else + {"UNSUPPORTED_SELECTION", 57, 250}, + #endif #ifdef PROV_R_UPDATE_CALL_OUT_OF_ORDER {"UPDATE_CALL_OUT_OF_ORDER", ERR_LIB_PROV, PROV_R_UPDATE_CALL_OUT_OF_ORDER}, #else @@ -5763,6 +5893,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"VALUE_ERROR", 57, 138}, #endif + #ifdef PROV_R_WRONG_CIPHERTEXT_SIZE + {"WRONG_CIPHERTEXT_SIZE", ERR_LIB_PROV, PROV_R_WRONG_CIPHERTEXT_SIZE}, + #else + {"WRONG_CIPHERTEXT_SIZE", 57, 251}, + #endif #ifdef PROV_R_WRONG_FINAL_BLOCK_LENGTH {"WRONG_FINAL_BLOCK_LENGTH", ERR_LIB_PROV, PROV_R_WRONG_FINAL_BLOCK_LENGTH}, #else @@ -5938,6 +6073,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"PRNG_NOT_SEEDED", 36, 100}, #endif + #ifdef RAND_R_RANDOM_POOL_IS_EMPTY + {"RANDOM_POOL_IS_EMPTY", ERR_LIB_RAND, RAND_R_RANDOM_POOL_IS_EMPTY}, + #else + {"RANDOM_POOL_IS_EMPTY", 36, 142}, + #endif #ifdef RAND_R_RANDOM_POOL_OVERFLOW {"RANDOM_POOL_OVERFLOW", ERR_LIB_RAND, RAND_R_RANDOM_POOL_OVERFLOW}, #else @@ -6923,6 +7063,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"DIGEST_CHECK_FAILED", 20, 149}, #endif + #ifdef SSL_R_DOMAIN_USE_ONLY + {"DOMAIN_USE_ONLY", ERR_LIB_SSL, SSL_R_DOMAIN_USE_ONLY}, + #else + {"DOMAIN_USE_ONLY", 20, 422}, + #endif #ifdef SSL_R_DTLS_MESSAGE_TOO_BIG {"DTLS_MESSAGE_TOO_BIG", ERR_LIB_SSL, SSL_R_DTLS_MESSAGE_TOO_BIG}, #else @@ -7213,6 +7358,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"LIBRARY_HAS_NO_CIPHERS", 20, 161}, #endif + #ifdef SSL_R_LISTENER_USE_ONLY + {"LISTENER_USE_ONLY", ERR_LIB_SSL, SSL_R_LISTENER_USE_ONLY}, + #else + {"LISTENER_USE_ONLY", 20, 421}, + #endif #ifdef SSL_R_MAXIMUM_ENCRYPTED_PKTS_REACHED {"MAXIMUM_ENCRYPTED_PKTS_REACHED", ERR_LIB_SSL, SSL_R_MAXIMUM_ENCRYPTED_PKTS_REACHED}, #else @@ -7243,6 +7393,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"MISSING_PSK_KEX_MODES_EXTENSION", 20, 310}, #endif + #ifdef SSL_R_MISSING_QUIC_TLS_FUNCTIONS + {"MISSING_QUIC_TLS_FUNCTIONS", ERR_LIB_SSL, SSL_R_MISSING_QUIC_TLS_FUNCTIONS}, + #else + {"MISSING_QUIC_TLS_FUNCTIONS", 20, 423}, + #endif #ifdef SSL_R_MISSING_RSA_CERTIFICATE {"MISSING_RSA_CERTIFICATE", ERR_LIB_SSL, SSL_R_MISSING_RSA_CERTIFICATE}, #else @@ -8983,6 +9138,11 @@ static struct py_ssl_error_code error_codes[] = { #else {"POLICY_WHEN_PROXY_LANGUAGE_REQUIRES_NO_POLICY", 34, 159}, #endif + #ifdef X509V3_R_PURPOSE_NOT_UNIQUE + {"PURPOSE_NOT_UNIQUE", ERR_LIB_X509V3, X509V3_R_PURPOSE_NOT_UNIQUE}, + #else + {"PURPOSE_NOT_UNIQUE", 34, 173}, + #endif #ifdef X509V3_R_SECTION_NOT_FOUND {"SECTION_NOT_FOUND", ERR_LIB_X509V3, X509V3_R_SECTION_NOT_FOUND}, #else diff --git a/Modules/_stat.c b/Modules/_stat.c index f11ca7d23b440d..1dabf2f6d5b07f 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -57,7 +57,7 @@ typedef unsigned short mode_t; * Only the names are defined by POSIX but not their value. All common file * types seems to have the same numeric value on all platforms, though. * - * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK + * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK */ #ifndef S_IFBLK @@ -86,7 +86,7 @@ typedef unsigned short mode_t; /* S_ISXXX() - * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR() + * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR() */ #ifndef S_ISBLK diff --git a/Modules/_struct.c b/Modules/_struct.c index c36079f1eb8886..dc50c167641f5e 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -9,8 +9,10 @@ #include "Python.h" #include "pycore_bytesobject.h" // _PyBytesWriter +#include "pycore_lock.h" // _PyOnceFlag_CallOnce() #include "pycore_long.h" // _PyLong_AsByteArray() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> // offsetof() @@ -1504,6 +1506,53 @@ static formatdef lilendian_table[] = { {0} }; +/* Ensure endian table optimization happens exactly once across all interpreters */ +static _PyOnceFlag endian_tables_init_once = {0}; + +static int +init_endian_tables(void *Py_UNUSED(arg)) +{ + const formatdef *native = native_table; + formatdef *other, *ptr; +#if PY_LITTLE_ENDIAN + other = lilendian_table; +#else + other = bigendian_table; +#endif + /* Scan through the native table, find a matching + entry in the endian table and swap in the + native implementations whenever possible + (64-bit platforms may not have "standard" sizes) */ + while (native->format != '\0' && other->format != '\0') { + ptr = other; + while (ptr->format != '\0') { + if (ptr->format == native->format) { + /* Match faster when formats are + listed in the same order */ + if (ptr == other) + other++; + /* Only use the trick if the + size matches */ + if (ptr->size != native->size) + break; + /* Skip float and double, could be + "unknown" float format */ + if (ptr->format == 'd' || ptr->format == 'f') + break; + /* Skip _Bool, semantics are different for standard size */ + if (ptr->format == '?') + break; + ptr->pack = native->pack; + ptr->unpack = native->unpack; + break; + } + ptr++; + } + native++; + } + return 0; +} + static const formatdef * whichtable(const char **pfmt) @@ -1794,9 +1843,7 @@ s_dealloc(PyObject *op) PyStructObject *s = PyStructObject_CAST(op); PyTypeObject *tp = Py_TYPE(s); PyObject_GC_UnTrack(s); - if (s->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, s->weakreflist); if (s->s_codes != NULL) { PyMem_Free(s->s_codes); } @@ -2714,47 +2761,8 @@ _structmodule_exec(PyObject *m) return -1; } - /* Check endian and swap in faster functions */ - { - const formatdef *native = native_table; - formatdef *other, *ptr; -#if PY_LITTLE_ENDIAN - other = lilendian_table; -#else - other = bigendian_table; -#endif - /* Scan through the native table, find a matching - entry in the endian table and swap in the - native implementations whenever possible - (64-bit platforms may not have "standard" sizes) */ - while (native->format != '\0' && other->format != '\0') { - ptr = other; - while (ptr->format != '\0') { - if (ptr->format == native->format) { - /* Match faster when formats are - listed in the same order */ - if (ptr == other) - other++; - /* Only use the trick if the - size matches */ - if (ptr->size != native->size) - break; - /* Skip float and double, could be - "unknown" float format */ - if (ptr->format == 'd' || ptr->format == 'f') - break; - /* Skip _Bool, semantics are different for standard size */ - if (ptr->format == '?') - break; - ptr->pack = native->pack; - ptr->unpack = native->unpack; - break; - } - ptr++; - } - native++; - } - } + /* init cannot fail */ + (void)_PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL); /* Add some symbolic constants to the module */ state->StructError = PyErr_NewException("struct.error", NULL, NULL); diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 42243023a45768..6313abf5485fff 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -228,7 +228,7 @@ pylongwriter_create(PyObject *module, PyObject *args) goto error; } - if (num < 0 || num >= PyLong_BASE) { + if (num < 0 || num >= (long)PyLong_BASE) { PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit"); goto error; } diff --git a/Modules/_testcapi/object.c b/Modules/_testcapi/object.c index 798ef97c495aeb..4d53b3c678a470 100644 --- a/Modules/_testcapi/object.c +++ b/Modules/_testcapi/object.c @@ -138,6 +138,15 @@ pyobject_is_unique_temporary(PyObject *self, PyObject *obj) return PyLong_FromLong(result); } +static PyObject * +pyobject_is_unique_temporary_new_object(PyObject *self, PyObject *unused) +{ + PyObject *obj = PyList_New(0); + int result = PyUnstable_Object_IsUniqueReferencedTemporary(obj); + Py_DECREF(obj); + return PyLong_FromLong(result); +} + static int MyObject_dealloc_called = 0; static void @@ -493,6 +502,7 @@ static PyMethodDef test_methods[] = { {"pyobject_clear_weakrefs_no_callbacks", pyobject_clear_weakrefs_no_callbacks, METH_O}, {"pyobject_enable_deferred_refcount", pyobject_enable_deferred_refcount, METH_O}, {"pyobject_is_unique_temporary", pyobject_is_unique_temporary, METH_O}, + {"pyobject_is_unique_temporary_new_object", pyobject_is_unique_temporary_new_object, METH_NOARGS}, {"test_py_try_inc_ref", test_py_try_inc_ref, METH_NOARGS}, {"test_xincref_doesnt_leak",test_xincref_doesnt_leak, METH_NOARGS}, {"test_incref_doesnt_leak", test_incref_doesnt_leak, METH_NOARGS}, diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index b8ecf53f4f8b9c..e70f5c68bc3b69 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args) } +static PyObject* +writer_write_ascii(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_write_widechar(PyObject *self_raw, PyObject *args) { @@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) static PyMethodDef writer_methods[] = { {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS}, {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS}, + {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS}, {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS}, {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS}, {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS}, diff --git a/Modules/_testcapi/vectorcall.c b/Modules/_testcapi/vectorcall.c index 03aaacb328e0b6..f89dcb6c4cf03c 100644 --- a/Modules/_testcapi/vectorcall.c +++ b/Modules/_testcapi/vectorcall.c @@ -179,14 +179,14 @@ _testcapi_VectorCallClass_set_vectorcall_impl(PyObject *self, if (!PyObject_TypeCheck(self, type)) { return PyErr_Format( PyExc_TypeError, - "expected %s instance", - PyType_GetName(type)); + "expected %N instance", + type); } if (!type->tp_vectorcall_offset) { return PyErr_Format( PyExc_TypeError, - "type %s has no vectorcall offset", - PyType_GetName(type)); + "type %N has no vectorcall offset", + type); } *(vectorcallfunc*)((char*)self + type->tp_vectorcall_offset) = ( VectorCallClass_vectorcall); diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3aa6e4c9e43a26..228fe4eff5812b 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2419,12 +2419,22 @@ test_critical_sections(PyObject *module, PyObject *Py_UNUSED(args)) Py_BEGIN_CRITICAL_SECTION2(module, module); Py_END_CRITICAL_SECTION2(); +#ifdef Py_GIL_DISABLED + // avoid unused variable compiler warning on GIL-enabled build + PyMutex mut = {0}; + Py_BEGIN_CRITICAL_SECTION_MUTEX(&mut); + Py_END_CRITICAL_SECTION(); + + Py_BEGIN_CRITICAL_SECTION2_MUTEX(&mut, &mut); + Py_END_CRITICAL_SECTION2(); +#endif + Py_RETURN_NONE; } // Used by `finalize_thread_hang`. -#ifdef _POSIX_THREADS +#if defined(_POSIX_THREADS) && !defined(__wasi__) static void finalize_thread_hang_cleanup_callback(void *Py_UNUSED(arg)) { // Should not reach here. Py_FatalError("pthread thread termination was triggered unexpectedly"); @@ -3175,6 +3185,48 @@ create_manual_heap_type(void) return (PyObject *)type; } +typedef struct { + PyObject_VAR_HEAD +} ManagedDictObject; + +int ManagedDict_traverse(PyObject *self, visitproc visit, void *arg) { + PyObject_VisitManagedDict(self, visit, arg); + Py_VISIT(Py_TYPE(self)); + return 0; +} + +int ManagedDict_clear(PyObject *self) { + PyObject_ClearManagedDict(self); + return 0; +} + +static PyGetSetDef ManagedDict_getset[] = { + {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, NULL, NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + +static PyType_Slot ManagedDict_slots[] = { + {Py_tp_new, (void *)PyType_GenericNew}, + {Py_tp_getset, (void *)ManagedDict_getset}, + {Py_tp_traverse, (void *)ManagedDict_traverse}, + {Py_tp_clear, (void *)ManagedDict_clear}, + {0} +}; + +static PyType_Spec ManagedDict_spec = { + "_testcapi.ManagedDictType", + sizeof(ManagedDictObject), + 0, // itemsize + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_MANAGED_DICT | Py_TPFLAGS_HEAPTYPE | Py_TPFLAGS_HAVE_GC, + ManagedDict_slots +}; + +static PyObject * +create_managed_dict_type(void) +{ + return PyType_FromSpec(&ManagedDict_spec); +} + static struct PyModuleDef _testcapimodule = { PyModuleDef_HEAD_INIT, .m_name = "_testcapi", @@ -3285,6 +3337,10 @@ PyInit__testcapi(void) PyModule_AddObject(m, "INT64_MAX", PyLong_FromInt64(INT64_MAX)); PyModule_AddObject(m, "UINT64_MAX", PyLong_FromUInt64(UINT64_MAX)); + if (PyModule_AddIntMacro(m, _Py_STACK_GROWS_DOWN)) { + return NULL; + } + if (PyModule_AddIntMacro(m, Py_single_input)) { return NULL; } @@ -3315,6 +3371,13 @@ PyInit__testcapi(void) return NULL; } + PyObject *managed_dict_type = create_managed_dict_type(); + if (managed_dict_type == NULL) { + return NULL; + } + if (PyModule_Add(m, "ManagedDictType", managed_dict_type) < 0) { + return NULL; + } /* Include tests from the _testcapi/ directory */ if (_PyTestCapi_Init_Vectorcall(m) < 0) { diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ae060c95fd5a01..ce11a81211e7e6 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -21,6 +21,7 @@ #include "pycore_fileutils.h" // _Py_normpath() #include "pycore_flowgraph.h" // _PyCompile_OptimizeCfg() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_function.h" // _PyFunction_GET_BUILTINS #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_import.h" // _PyImport_ClearExtension() @@ -120,10 +121,22 @@ get_c_recursion_remaining(PyObject *self, PyObject *Py_UNUSED(args)) PyThreadState *tstate = _PyThreadState_GET(); uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - int remaining = (int)((here_addr - _tstate->c_stack_soft_limit)/PYOS_STACK_MARGIN_BYTES * 50); + int remaining = (int)((here_addr - _tstate->c_stack_soft_limit) / _PyOS_STACK_MARGIN_BYTES * 50); return PyLong_FromLong(remaining); } +static PyObject* +get_stack_pointer(PyObject *self, PyObject *Py_UNUSED(args)) +{ + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + return PyLong_FromSize_t(here_addr); +} + +static PyObject* +get_stack_margin(PyObject *self, PyObject *Py_UNUSED(args)) +{ + return PyLong_FromSize_t(_PyOS_STACK_MARGIN_BYTES); +} static PyObject* test_bswap(PyObject *self, PyObject *Py_UNUSED(args)) @@ -1022,7 +1035,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } @@ -1045,6 +1058,9 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) #define SET_COUNT(DICT, STRUCT, NAME) \ do { \ PyObject *count = PyLong_FromLong(STRUCT.NAME); \ + if (count == NULL) { \ + goto error; \ + } \ int res = PyDict_SetItemString(DICT, #NAME, count); \ Py_DECREF(count); \ if (res < 0) { \ @@ -1165,6 +1181,47 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) return NULL; } +static PyObject * +verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *codearg; + PyObject *globalnames = NULL; + PyObject *globalsns = NULL; + PyObject *builtinsns = NULL; + static char *kwlist[] = {"code", "globalnames", + "globalsns", "builtinsns", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|O!O!O!:get_code_var_counts", kwlist, + &codearg, &PySet_Type, &globalnames, + &PyDict_Type, &globalsns, &PyDict_Type, &builtinsns)) + { + return NULL; + } + if (PyFunction_Check(codearg)) { + if (globalsns == NULL) { + globalsns = PyFunction_GET_GLOBALS(codearg); + } + if (builtinsns == NULL) { + builtinsns = _PyFunction_GET_BUILTINS(codearg); + } + codearg = PyFunction_GET_CODE(codearg); + } + else if (!PyCode_Check(codearg)) { + PyErr_SetString(PyExc_TypeError, + "argument must be a code object or a function"); + return NULL; + } + PyCodeObject *code = (PyCodeObject *)codearg; + + if (_PyCode_VerifyStateless( + tstate, code, globalnames, globalsns, builtinsns) < 0) + { + return NULL; + } + Py_RETURN_NONE; +} + #ifdef _Py_TIER2 static PyObject * @@ -1649,11 +1706,12 @@ create_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) static PyObject * destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"id", NULL}; + static char *kwlist[] = {"id", "basic", NULL}; PyObject *idobj = NULL; + int basic = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O:destroy_interpreter", kwlist, - &idobj)) + "O|p:destroy_interpreter", kwlist, + &idobj, &basic)) { return NULL; } @@ -1663,7 +1721,27 @@ destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } - _PyXI_EndInterpreter(interp, NULL, NULL); + if (basic) + { + // Test the basic Py_EndInterpreter with weird out of order thread states + PyThreadState *t1, *t2; + PyThreadState *prev; + t1 = interp->threads.head; + if (t1 == NULL) { + t1 = PyThreadState_New(interp); + } + t2 = PyThreadState_New(interp); + prev = PyThreadState_Swap(t2); + PyThreadState_Clear(t1); + PyThreadState_Delete(t1); + Py_EndInterpreter(t2); + PyThreadState_Swap(prev); + } + else + { + // use the cross interpreter _PyXI_EndInterpreter normally + _PyXI_EndInterpreter(interp, NULL, NULL); + } Py_RETURN_NONE; } @@ -1723,9 +1801,9 @@ exec_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) /* To run some code in a sub-interpreter. -Generally you can use test.support.interpreters, +Generally you can use the interpreters module, but we keep this helper as a distinct implementation. -That's especially important for testing test.support.interpreters. +That's especially important for testing the interpreters module. */ static PyObject * run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) @@ -1929,7 +2007,14 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } if (strcmp(mode, "xidata") == 0) { - if (_PyObject_GetXIData(tstate, obj, xidata) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "fallback") == 0) { + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) + { goto error; } } @@ -1948,6 +2033,21 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs) goto error; } } + else if (strcmp(mode, "func") == 0) { + if (_PyFunction_GetXIData(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "script") == 0) { + if (_PyCode_GetScriptXIData(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "script-pure") == 0) { + if (_PyCode_GetPureScriptXIData(tstate, obj, xidata) != 0) { + goto error; + } + } else { PyErr_Format(PyExc_ValueError, "unsupported mode %R", modeobj); goto error; @@ -2258,10 +2358,114 @@ incref_decref_delayed(PyObject *self, PyObject *op) Py_RETURN_NONE; } +#ifdef __EMSCRIPTEN__ +#include "emscripten.h" + +EM_JS(int, emscripten_set_up_async_input_device_js, (void), { + let idx = 0; + const encoder = new TextEncoder(); + const bufs = [ + encoder.encode("ab\n"), + encoder.encode("fi\n"), + encoder.encode("xy\n"), + ]; + function sleep(t) { + return new Promise(res => setTimeout(res, t)); + } + FS.createAsyncInputDevice("/dev", "blah", async () => { + await sleep(5); + return bufs[(idx ++) % 3]; + }); + return !!WebAssembly.promising; +}); + +static PyObject * +emscripten_set_up_async_input_device(PyObject *self, PyObject *Py_UNUSED(ignored)) { + if (emscripten_set_up_async_input_device_js()) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} +#endif + +static PyObject * +vectorcall_nop(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_RETURN_NONE; +} + +static PyObject * +set_vectorcall_nop(PyObject *self, PyObject *func) +{ + if (!PyFunction_Check(func)) { + PyErr_SetString(PyExc_TypeError, "expected function"); + return NULL; + } + + ((PyFunctionObject*)func)->vectorcall = vectorcall_nop; + Py_RETURN_NONE; +} + + +static void +check_threadstate_set_stack_protection(PyThreadState *tstate, + void *start, size_t size) +{ + assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == 0); + assert(!PyErr_Occurred()); + + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_top == (uintptr_t)start + size); + assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); +} + + +static PyObject * +test_threadstate_set_stack_protection(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyThreadState *tstate = PyThreadState_GET(); + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(!PyErr_Occurred()); + + uintptr_t init_base = ts->c_stack_init_base; + size_t init_top = ts->c_stack_init_top; + + // Test the minimum stack size + size_t size = _PyOS_MIN_STACK_SIZE; + void *start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack_protection(tstate, start, size); + + // Test a larger size + size = 7654321; + assert(size > _PyOS_MIN_STACK_SIZE); + start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack_protection(tstate, start, size); + + // Test invalid size (too small) + size = 5; + start = (void*)(_Py_get_machine_stack_pointer() - size); + assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == -1); + assert(PyErr_ExceptionMatches(PyExc_ValueError)); + PyErr_Clear(); + + // Test PyUnstable_ThreadState_ResetStackProtection() + PyUnstable_ThreadState_ResetStackProtection(tstate); + assert(ts->c_stack_init_base == init_base); + assert(ts->c_stack_init_top == init_top); + + Py_RETURN_NONE; +} + + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"get_c_recursion_remaining", get_c_recursion_remaining, METH_NOARGS}, + {"get_stack_pointer", get_stack_pointer, METH_NOARGS}, + {"get_stack_margin", get_stack_margin, METH_NOARGS}, {"test_bswap", test_bswap, METH_NOARGS}, {"test_popcount", test_popcount, METH_NOARGS}, {"test_bit_length", test_bit_length, METH_NOARGS}, @@ -2292,6 +2496,8 @@ static PyMethodDef module_functions[] = { {"get_co_localskinds", get_co_localskinds, METH_O, NULL}, {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), METH_VARARGS | METH_KEYWORDS, NULL}, + {"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code), + METH_VARARGS | METH_KEYWORDS, NULL}, #ifdef _Py_TIER2 {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL}, @@ -2358,6 +2564,12 @@ static PyMethodDef module_functions[] = { {"is_static_immortal", is_static_immortal, METH_O}, {"incref_decref_delayed", incref_decref_delayed, METH_O}, GET_NEXT_DICT_KEYS_VERSION_METHODDEF +#ifdef __EMSCRIPTEN__ + {"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS}, +#endif + {"set_vectorcall_nop", set_vectorcall_nop, METH_O}, + {"test_threadstate_set_stack_protection", + test_threadstate_set_stack_protection, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c index e0ba37abcdd332..e3b2fe716d48d3 100644 --- a/Modules/_testinternalcapi/test_critical_sections.c +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -284,10 +284,111 @@ test_critical_sections_gc(PyObject *self, PyObject *Py_UNUSED(args)) #endif +#ifdef Py_GIL_DISABLED + +static PyObject * +test_critical_section1_reacquisition(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + + PyCriticalSection cs1, cs2; + // First acquisition of critical section on object locks it + PyCriticalSection_Begin(&cs1, a); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert(_PyThreadState_GET()->critical_section == (uintptr_t)&cs1); + // Attempting to re-acquire critical section on same object which + // is already locked by top-most critical section is a no-op. + PyCriticalSection_Begin(&cs2, a); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert(_PyThreadState_GET()->critical_section == (uintptr_t)&cs1); + // Releasing second critical section is a no-op. + PyCriticalSection_End(&cs2); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert(_PyThreadState_GET()->critical_section == (uintptr_t)&cs1); + // Releasing first critical section unlocks the object + PyCriticalSection_End(&cs1); + assert(!PyMutex_IsLocked(&a->ob_mutex)); + + Py_DECREF(a); + Py_RETURN_NONE; +} + +static PyObject * +test_critical_section2_reacquisition(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyObject *a = PyDict_New(); + assert(a != NULL); + PyObject *b = PyDict_New(); + assert(b != NULL); + + PyCriticalSection2 cs; + // First acquisition of critical section on objects locks them + PyCriticalSection2_Begin(&cs, a, b); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(PyMutex_IsLocked(&b->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert((_PyThreadState_GET()->critical_section & + ~_Py_CRITICAL_SECTION_MASK) == (uintptr_t)&cs); + + // Attempting to re-acquire critical section on either of two + // objects already locked by top-most critical section is a no-op. + + // Check re-acquiring on first object + PyCriticalSection a_cs; + PyCriticalSection_Begin(&a_cs, a); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(PyMutex_IsLocked(&b->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert((_PyThreadState_GET()->critical_section & + ~_Py_CRITICAL_SECTION_MASK) == (uintptr_t)&cs); + // Releasing critical section on either object is a no-op. + PyCriticalSection_End(&a_cs); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(PyMutex_IsLocked(&b->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert((_PyThreadState_GET()->critical_section & + ~_Py_CRITICAL_SECTION_MASK) == (uintptr_t)&cs); + + // Check re-acquiring on second object + PyCriticalSection b_cs; + PyCriticalSection_Begin(&b_cs, b); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(PyMutex_IsLocked(&b->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert((_PyThreadState_GET()->critical_section & + ~_Py_CRITICAL_SECTION_MASK) == (uintptr_t)&cs); + // Releasing critical section on either object is a no-op. + PyCriticalSection_End(&b_cs); + assert(PyMutex_IsLocked(&a->ob_mutex)); + assert(PyMutex_IsLocked(&b->ob_mutex)); + assert(_PyCriticalSection_IsActive(PyThreadState_GET()->critical_section)); + assert((_PyThreadState_GET()->critical_section & + ~_Py_CRITICAL_SECTION_MASK) == (uintptr_t)&cs); + + // Releasing critical section on both objects unlocks them + PyCriticalSection2_End(&cs); + assert(!PyMutex_IsLocked(&a->ob_mutex)); + assert(!PyMutex_IsLocked(&b->ob_mutex)); + + Py_DECREF(a); + Py_DECREF(b); + Py_RETURN_NONE; +} + +#endif // Py_GIL_DISABLED + static PyMethodDef test_methods[] = { {"test_critical_sections", test_critical_sections, METH_NOARGS}, {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS}, {"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS}, +#ifdef Py_GIL_DISABLED + {"test_critical_section1_reacquisition", test_critical_section1_reacquisition, METH_NOARGS}, + {"test_critical_section2_reacquisition", test_critical_section2_reacquisition, METH_NOARGS}, +#endif #ifdef Py_CAN_START_THREADS {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS}, {"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS}, diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 8d678412fe7179..ded76ca9fe6819 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -57,7 +57,10 @@ lock_thread(void *arg) _Py_atomic_store_int(&test_data->started, 1); PyMutex_Lock(m); - assert(m->_bits == 1); + // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set + // (m->_bits == 3) due to bucket collisions in the parking lot hash table + // between this mutex and the `test_data.done` event. + assert(m->_bits == 1 || m->_bits == 3); PyMutex_Unlock(m); assert(m->_bits == 0); @@ -88,7 +91,8 @@ test_lock_two_threads(PyObject *self, PyObject *obj) } while (v != 3 && iters < 200); // both the "locked" and the "has parked" bits should be set - assert(test_data.m._bits == 3); + v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); + assert(v == 3); PyMutex_Unlock(&test_data.m); PyEvent_Wait(&test_data.done); diff --git a/Modules/_testlimitedcapi/set.c b/Modules/_testlimitedcapi/set.c index 35da5fa5f008e1..34ed6b1d60b5a4 100644 --- a/Modules/_testlimitedcapi/set.c +++ b/Modules/_testlimitedcapi/set.c @@ -155,6 +155,51 @@ test_frozenset_add_in_capi(PyObject *self, PyObject *Py_UNUSED(obj)) return NULL; } +static PyObject * +test_set_contains_does_not_convert_unhashable_key(PyObject *self, PyObject *Py_UNUSED(obj)) +{ + // See https://docs.python.org/3/c-api/set.html#c.PySet_Contains + PyObject *outer_set = PySet_New(NULL); + + PyObject *needle = PySet_New(NULL); + if (needle == NULL) { + Py_DECREF(outer_set); + return NULL; + } + + PyObject *num = PyLong_FromLong(42); + if (num == NULL) { + Py_DECREF(outer_set); + Py_DECREF(needle); + return NULL; + } + + if (PySet_Add(needle, num) < 0) { + Py_DECREF(outer_set); + Py_DECREF(needle); + Py_DECREF(num); + return NULL; + } + + int result = PySet_Contains(outer_set, needle); + + Py_DECREF(num); + Py_DECREF(needle); + Py_DECREF(outer_set); + + if (result < 0) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + Py_RETURN_NONE; + } + return NULL; + } + + PyErr_SetString(PyExc_AssertionError, + "PySet_Contains should have raised TypeError for unhashable key"); + return NULL; +} + static PyMethodDef test_methods[] = { {"set_check", set_check, METH_O}, {"set_checkexact", set_checkexact, METH_O}, @@ -174,6 +219,8 @@ static PyMethodDef test_methods[] = { {"set_clear", set_clear, METH_O}, {"test_frozenset_add_in_capi", test_frozenset_add_in_capi, METH_NOARGS}, + {"test_set_contains_does_not_convert_unhashable_key", + test_set_contains_does_not_convert_unhashable_key, METH_NOARGS}, {NULL}, }; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 9776a32755db68..1389a1ef2c1b23 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -281,6 +281,12 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state) continue; } + // Keep handles for threads that have not been started yet. They are + // safe to start in the child process. + if (handle->state == THREAD_HANDLE_NOT_STARTED) { + continue; + } + // Mark all threads as done. Any attempts to join or detach the // underlying OS thread (if any) could crash. We are the only thread; // it's safe to set this non-atomically. @@ -661,12 +667,12 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args) PyThreadHandleObject *self = PyThreadHandleObject_CAST(op); PyObject *timeout_obj = NULL; - if (!PyArg_ParseTuple(args, "|O?:join", &timeout_obj)) { + if (!PyArg_ParseTuple(args, "|O:join", &timeout_obj)) { return NULL; } PyTime_t timeout_ns = -1; - if (timeout_obj != NULL) { + if (timeout_obj != NULL && timeout_obj != Py_None) { if (_PyTime_FromSecondsObject(&timeout_ns, timeout_obj, _PyTime_ROUND_TIMEOUT) < 0) { return NULL; @@ -684,6 +690,9 @@ PyThreadHandleObject_is_done(PyObject *op, PyObject *Py_UNUSED(dummy)) { PyThreadHandleObject *self = PyThreadHandleObject_CAST(op); if (_PyEvent_IsSet(&self->handle->thread_is_exiting)) { + if (_PyOnceFlag_CallOnce(&self->handle->once, join_thread, self->handle) == -1) { + return NULL; + } Py_RETURN_TRUE; } else { @@ -1011,6 +1020,11 @@ rlock_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +static int +rlock_locked_impl(rlockobject *self) +{ + return PyMutex_IsLocked(&self->lock.mutex); +} static void rlock_dealloc(PyObject *self) @@ -1100,7 +1114,7 @@ static PyObject * rlock_locked(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = rlockobject_CAST(op); - int is_locked = _PyRecursiveMutex_IsLockedByCurrentThread(&self->lock); + int is_locked = rlock_locked_impl(self); return PyBool_FromLong(is_locked); } @@ -1202,10 +1216,17 @@ rlock_repr(PyObject *op) { rlockobject *self = rlockobject_CAST(op); PyThread_ident_t owner = self->lock.thread; - size_t count = self->lock.level + 1; + int locked = rlock_locked_impl(self); + size_t count; + if (locked) { + count = self->lock.level + 1; + } + else { + count = 0; + } return PyUnicode_FromFormat( "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", - owner ? "locked" : "unlocked", + locked ? "locked" : "unlocked", Py_TYPE(self)->tp_name, owner, count, self); } @@ -1345,9 +1366,7 @@ static void localdummy_dealloc(PyObject *op) { localdummyobject *self = localdummyobject_CAST(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); PyTypeObject *tp = Py_TYPE(self); tp->tp_free(self); Py_DECREF(tp); @@ -1929,10 +1948,10 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, PyObject *func = NULL; int daemon = 1; thread_module_state *state = get_thread_state(module); - PyObject *hobj = Py_None; + PyObject *hobj = NULL; if (!PyArg_ParseTupleAndKeywords(fargs, fkwargs, - "O|O!?p:start_joinable_thread", keywords, - &func, state->thread_handle_type, &hobj, &daemon)) { + "O|Op:start_joinable_thread", keywords, + &func, &hobj, &daemon)) { return NULL; } @@ -1942,6 +1961,14 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, return NULL; } + if (hobj == NULL) { + hobj = Py_None; + } + else if (hobj != Py_None && !Py_IS_TYPE(hobj, state->thread_handle_type)) { + PyErr_SetString(PyExc_TypeError, "'handle' must be a _ThreadHandle"); + return NULL; + } + if (PySys_Audit("_thread.start_joinable_thread", "OiO", func, daemon, hobj) < 0) { return NULL; @@ -2301,7 +2328,7 @@ thread_excepthook(PyObject *module, PyObject *args) } PyDoc_STRVAR(excepthook_doc, -"_excepthook($module, (exc_type, exc_value, exc_traceback, thread), /)\n\ +"_excepthook($module, args, /)\n\ --\n\ \n\ Handle uncaught Thread.run() exception."); @@ -2447,7 +2474,9 @@ _thread__get_name_impl(PyObject *module) } #ifdef __sun - return PyUnicode_DecodeUTF8(name, strlen(name), "surrogateescape"); + // gh-138004: Decode Solaris/Illumos (e.g. OpenIndiana) thread names + // from ASCII, since OpenIndiana only supports ASCII names. + return PyUnicode_DecodeASCII(name, strlen(name), "surrogateescape"); #else return PyUnicode_DecodeFSDefault(name); #endif @@ -2485,8 +2514,9 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) { #ifndef MS_WINDOWS #ifdef __sun - // Solaris always uses UTF-8 - const char *encoding = "utf-8"; + // gh-138004: Encode Solaris/Illumos thread names to ASCII, + // since OpenIndiana does not support non-ASCII names. + const char *encoding = "ascii"; #else // Encode the thread name to the filesystem encoding using the "replace" // error handler diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 77695401919cb7..08fb96169da6c3 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -599,8 +599,12 @@ Tkapp_New(const char *screenName, const char *className, v->interp = Tcl_CreateInterp(); v->wantobjects = wantobjects; +#if TCL_MAJOR_VERSION >= 9 + v->threaded = 1; +#else v->threaded = Tcl_GetVar2Ex(v->interp, "tcl_platform", "threaded", TCL_GLOBAL_ONLY) != NULL; +#endif v->thread_id = Tcl_GetCurrentThread(); v->dispatching = 0; v->trace = NULL; diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index c5e78b1510b5e3..c31a7e8fea5608 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -78,23 +78,47 @@ py_UuidCreate(PyObject *Py_UNUSED(context), return NULL; } +static int +py_windows_has_stable_node(void) +{ + UUID uuid; + RPC_STATUS res; + Py_BEGIN_ALLOW_THREADS + res = UuidCreateSequential(&uuid); + Py_END_ALLOW_THREADS + return res == RPC_S_OK; +} #endif /* MS_WINDOWS */ static int -uuid_exec(PyObject *module) { +uuid_exec(PyObject *module) +{ +#define ADD_INT(NAME, VALUE) \ + do { \ + if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ + return -1; \ + } \ + } while (0) + assert(sizeof(uuid_t) == 16); #if defined(MS_WINDOWS) - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #elif defined(HAVE_UUID_GENERATE_TIME_SAFE) - int has_uuid_generate_time_safe = 1; + ADD_INT("has_uuid_generate_time_safe", 1); #else - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #endif - if (PyModule_AddIntConstant(module, "has_uuid_generate_time_safe", - has_uuid_generate_time_safe) < 0) { - return -1; - } + +#if defined(MS_WINDOWS) + ADD_INT("has_stable_extractable_node", py_windows_has_stable_node()); +#elif defined(HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC) + ADD_INT("has_stable_extractable_node", 1); +#else + ADD_INT("has_stable_extractable_node", 0); +#endif + +#undef ADD_INT return 0; } diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index abd53436b21b29..5c5383d260a040 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -7,6 +7,7 @@ #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_typeobject.h" // _PyType_GetModuleState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "datetime.h" // PyDateTime_TZInfo @@ -375,9 +376,7 @@ zoneinfo_dealloc(PyObject *obj_self) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(obj_self); - } + FT_CLEAR_WEAKREFS(obj_self, self->weakreflist); if (self->trans_list_utc != NULL) { PyMem_Free(self->trans_list_utc); diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 4d046859a1540e..8af6156a0da575 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -1,14 +1,16 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include <zstd.h> // ZSTD_*() +#include <zdict.h> // ZDICT_*() + /*[clinic input] module _zstd @@ -17,52 +19,91 @@ module _zstd #include "clinic/_zstdmodule.c.h" +ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype) +{ + if (state == NULL) { + return NULL; + } + + /* Check ZstdDict */ + if (PyObject_TypeCheck(dict, state->ZstdDict_type)) { + return (ZstdDict*)dict; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2 + && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type) + && PyLong_Check(PyTuple_GET_ITEM(dict, 1))) + { + int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == -1 && PyErr_Occurred()) { + return NULL; + } + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) + { + *ptype = type; + return (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be a ZstdDict object."); + return NULL; +} + /* Format error message and set ZstdError. */ void -set_zstd_error(const _zstd_state* const state, - error_type type, size_t zstd_ret) +set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) { - char *msg; + const char *msg; assert(ZSTD_isError(zstd_ret)); - switch (type) - { - case ERR_DECOMPRESS: - msg = "Unable to decompress zstd data: %s"; - break; - case ERR_COMPRESS: - msg = "Unable to compress zstd data: %s"; - break; - case ERR_SET_PLEDGED_INPUT_SIZE: - msg = "Unable to set pledged uncompressed content size: %s"; - break; - - case ERR_LOAD_D_DICT: - msg = "Unable to load zstd dictionary or prefix for decompression: %s"; - break; - case ERR_LOAD_C_DICT: - msg = "Unable to load zstd dictionary or prefix for compression: %s"; - break; - - case ERR_GET_C_BOUNDS: - msg = "Unable to get zstd compression parameter bounds: %s"; - break; - case ERR_GET_D_BOUNDS: - msg = "Unable to get zstd decompression parameter bounds: %s"; - break; - case ERR_SET_C_LEVEL: - msg = "Unable to set zstd compression level: %s"; - break; - - case ERR_TRAIN_DICT: - msg = "Unable to train zstd dictionary: %s"; - break; - case ERR_FINALIZE_DICT: - msg = "Unable to finalize zstd dictionary: %s"; - break; - - default: - Py_UNREACHABLE(); + if (state == NULL) { + return; + } + switch (type) { + case ERR_DECOMPRESS: + msg = "Unable to decompress Zstandard data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress Zstandard data: %s"; + break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train the Zstandard dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize the Zstandard dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); } PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); } @@ -72,8 +113,7 @@ typedef struct { char parameter_name[32]; } ParameterInfo; -static const ParameterInfo cp_list[] = -{ +static const ParameterInfo cp_list[] = { {ZSTD_c_compressionLevel, "compression_level"}, {ZSTD_c_windowLog, "window_log"}, {ZSTD_c_hashLog, "hash_log"}, @@ -98,22 +138,18 @@ static const ParameterInfo cp_list[] = {ZSTD_c_overlapLog, "overlap_log"} }; -static const ParameterInfo dp_list[] = -{ +static const ParameterInfo dp_list[] = { {ZSTD_d_windowLogMax, "window_log_max"} }; void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) +set_parameter_error(int is_compress, int key_v, int value_v) { ParameterInfo const *list; int list_size; - char const *name; char *type; ZSTD_bounds bounds; - int i; - char pos_msg[128]; + char pos_msg[64]; if (is_compress) { list = cp_list; @@ -127,8 +163,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { + char const *name = NULL; + for (int i = 0; i < list_size; i++) { if (key_v == (list+i)->parameter) { name = (list+i)->parameter_name; break; @@ -150,20 +186,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress, bounds = ZSTD_dParam_getBounds(key_v); } if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Zstd %s parameter \"%s\" is invalid. (zstd v%s)", - type, name, ZSTD_versionString()); + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", + type, name); return; } /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(zstd v%s, %d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t)); + PyErr_Format(PyExc_ValueError, + "%s parameter '%s' received an illegal value %d; " + "the valid range is [%d, %d]", + type, name, value_v, bounds.lowerBound, bounds.upperBound); } static inline _zstd_state* @@ -174,9 +206,57 @@ get_zstd_state(PyObject *module) return (_zstd_state *)state; } +static Py_ssize_t +calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, + size_t **chunk_sizes) +{ + Py_ssize_t chunks_number; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + chunks_number = PyTuple_GET_SIZE(samples_sizes); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return -1; + } + + /* Prepare chunk_sizes */ + *chunk_sizes = PyMem_New(size_t, chunks_number); + if (*chunk_sizes == NULL) { + PyErr_NoMemory(); + return -1; + } + + sizes_sum = PyBytes_GET_SIZE(samples_bytes); + for (i = 0; i < chunks_number; i++) { + size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i)); + (*chunk_sizes)[i] = size; + if (size == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto sum_error; + } + return -1; + } + if ((size_t)sizes_sum < size) { + goto sum_error; + } + sizes_sum -= size; + } + + if (sizes_sum != 0) { +sum_error: + PyErr_SetString(PyExc_ValueError, + "The samples size tuple doesn't match the " + "concatenation's size."); + return -1; + } + return chunks_number; +} + /*[clinic input] -_zstd._train_dict +_zstd.train_dict samples_bytes: PyBytesObject Concatenation of samples. @@ -186,59 +266,30 @@ _zstd._train_dict The size of the dictionary. / -Internal function, train a zstd dictionary on sample data. +Train a Zstandard dictionary on sample data. [clinic start generated code]*/ static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size) -/*[clinic end generated code: output=b5b4f36347c0addd input=2dce5b57d63923e2]*/ +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size) +/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ { - // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict - // are pretty similar. We should see if we can refactor them to share that code. - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; + size_t *chunk_sizes = NULL; + Py_ssize_t chunks_number; size_t zstd_ret; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -250,16 +301,16 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Train the dictionary */ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); - char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + const char *samples_buffer = PyBytes_AS_STRING(samples_bytes); Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, samples_buffer, chunk_sizes, (uint32_t)chunks_number); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); goto error; } @@ -280,7 +331,7 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, } /*[clinic input] -_zstd._finalize_dict +_zstd.finalize_dict custom_dict_bytes: PyBytesObject Custom dictionary content. @@ -291,63 +342,36 @@ _zstd._finalize_dict dict_size: Py_ssize_t The size of the dictionary. compression_level: int - Optimize for a specific zstd compression level, 0 means default. + Optimize for a specific Zstandard compression level, 0 means default. / -Internal function, finalize a zstd dictionary. +Finalize a Zstandard dictionary. [clinic start generated code]*/ static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level) -/*[clinic end generated code: output=5dc5b520fddba37f input=8afd42a249078460]*/ +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level) +/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/ { Py_ssize_t chunks_number; size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; size_t zstd_ret; ZDICT_params_t params; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -359,7 +383,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /* Parameters */ - /* Optimize for a specific zstd compression level, 0 means default. */ + /* Optimize for a specific Zstandard compression level, 0 means default. */ params.compressionLevel = compression_level; /* Write log to stderr, 0 = none. */ params.notificationLevel = 0; @@ -370,14 +394,15 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_finalizeDictionary( PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(custom_dict_bytes), + Py_SIZE(custom_dict_bytes), PyBytes_AS_STRING(samples_bytes), chunk_sizes, (uint32_t)chunks_number, params); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); goto error; } @@ -399,26 +424,25 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /*[clinic input] -_zstd._get_param_bounds +_zstd.get_param_bounds parameter: int The parameter to get bounds. is_compress: bool True for CompressionParameter, False for DecompressionParameter. -Internal function, get CompressionParameter/DecompressionParameter bounds. +Get CompressionParameter/DecompressionParameter bounds. [clinic start generated code]*/ static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress) -/*[clinic end generated code: output=9892cd822f937e79 input=884cd1a01125267d]*/ +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) +/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/ { ZSTD_bounds bound; if (is_compress) { bound = ZSTD_cParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); return NULL; } @@ -426,7 +450,7 @@ _zstd__get_param_bounds_impl(PyObject *module, int parameter, else { bound = ZSTD_dParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); return NULL; } @@ -442,24 +466,23 @@ _zstd.get_frame_size A bytes-like object, it should start from the beginning of a frame, and contains at least one complete frame. -Get the size of a zstd frame, including frame header and 4-byte checksum if it has one. - -It will iterate all blocks' headers within a frame, to accumulate the frame size. +Get the size of a Zstandard frame, including the header and optional checksum. [clinic start generated code]*/ static PyObject * _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/ +/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/ { size_t frame_size; - frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len); + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, + frame_buffer->len); if (ZSTD_isError(frame_size)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_Format(mod_state->ZstdError, - "Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " + "Error when finding the compressed size of a Zstandard frame. " + "Ensure the frame_buffer argument starts from the " + "beginning of a frame, and its length is not less than this " "complete frame. Zstd error message: %s.", ZSTD_getErrorName(frame_size)); return NULL; @@ -469,17 +492,17 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._get_frame_info +_zstd.get_frame_info frame_buffer: Py_buffer - A bytes-like object, containing the header of a zstd frame. + A bytes-like object, containing the header of a Zstandard frame. -Internal function, get zstd frame infomation from a frame header. +Get Zstandard frame infomation from a frame header. [clinic start generated code]*/ static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/ +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/ { uint64_t decompressed_size; uint32_t dict_id; @@ -491,12 +514,12 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_SetString(mod_state->ZstdError, "Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " + "a Zstandard frame. Ensure the frame_buffer argument " "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes)."); + "is not less than the frame header (6~18 bytes)."); return NULL; } @@ -511,325 +534,169 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._set_parameter_types +_zstd.set_parameter_types c_parameter_type: object(subclass_of='&PyType_Type') CompressionParameter IntEnum type object d_parameter_type: object(subclass_of='&PyType_Type') DecompressionParameter IntEnum type object -Internal function, set CompressionParameter/DecompressionParameter types for validity check. +Set CompressionParameter and DecompressionParameter types for validity check. [clinic start generated code]*/ static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type) -/*[clinic end generated code: output=a13d4890ccbd2873 input=4535545d903853d3]*/ +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type) +/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ { - _zstd_state* const mod_state = get_zstd_state(module); - - if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { - PyErr_SetString(PyExc_ValueError, - "The two arguments should be CompressionParameter and " - "DecompressionParameter types."); - return NULL; - } + _zstd_state* mod_state = get_zstd_state(module); - Py_XDECREF(mod_state->CParameter_type); Py_INCREF(c_parameter_type); - mod_state->CParameter_type = (PyTypeObject*)c_parameter_type; - - Py_XDECREF(mod_state->DParameter_type); + Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type); Py_INCREF(d_parameter_type); - mod_state->DParameter_type = (PyTypeObject*)d_parameter_type; + Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type); Py_RETURN_NONE; } static PyMethodDef _zstd_methods[] = { - _ZSTD__TRAIN_DICT_METHODDEF - _ZSTD__FINALIZE_DICT_METHODDEF - _ZSTD__GET_PARAM_BOUNDS_METHODDEF + _ZSTD_TRAIN_DICT_METHODDEF + _ZSTD_FINALIZE_DICT_METHODDEF + _ZSTD_GET_PARAM_BOUNDS_METHODDEF _ZSTD_GET_FRAME_SIZE_METHODDEF - _ZSTD__GET_FRAME_INFO_METHODDEF - _ZSTD__SET_PARAMETER_TYPES_METHODDEF - - {0} + _ZSTD_GET_FRAME_INFO_METHODDEF + _ZSTD_SET_PARAMETER_TYPES_METHODDEF + {NULL, NULL} }; - -#define ADD_INT_PREFIX_MACRO(module, macro) \ - do { \ - if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \ - return -1; \ - } \ - } while(0) - static int -add_parameters(PyObject *module) -{ - /* If add new parameters, please also add to cp_list/dp_list above. */ - - /* Compression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog); - - /* Decompression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax); - - /* ZSTD_strategy enum */ - ADD_INT_PREFIX_MACRO(module, ZSTD_fast); - ADD_INT_PREFIX_MACRO(module, ZSTD_dfast); - ADD_INT_PREFIX_MACRO(module, ZSTD_greedy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btopt); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2); - - return 0; -} - -static inline PyObject * -get_zstd_version_info(void) +_zstd_exec(PyObject *m) { - uint32_t ver = ZSTD_versionNumber(); - uint32_t major, minor, release; - - major = ver / 10000; - minor = (ver / 100) % 100; - release = ver % 100; - - return Py_BuildValue("III", major, minor, release); -} - -static inline int -add_vars_to_module(PyObject *module) -{ - PyObject *obj; - - /* zstd_version, a str. */ - if (PyModule_AddStringConstant(module, "zstd_version", - ZSTD_versionString()) < 0) { - return -1; - } - - /* zstd_version_info, a tuple. */ - obj = get_zstd_version_info(); - if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* Add zstd parameters */ - if (add_parameters(module) < 0) { - return -1; - } - - /* _compressionLevel_values: (default, min, max) - ZSTD_defaultCLevel() was added in zstd v1.5.0 */ - obj = Py_BuildValue("iii", -#if ZSTD_VERSION_NUMBER < 10500 - ZSTD_CLEVEL_DEFAULT, -#else - ZSTD_defaultCLevel(), -#endif - ZSTD_minCLevel(), - ZSTD_maxCLevel()); - if (PyModule_AddObjectRef(module, - "_compressionLevel_values", - obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_CStreamInSize(), - (uint32_t)ZSTD_CStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_DStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_DStreamInSize(), - (uint32_t)ZSTD_DStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CONFIG */ - obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c", - Py_False, - Py_True, -/* User mremap output buffer */ -#if defined(HAVE_MREMAP) - Py_True -#else - Py_False -#endif - ); - if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - return 0; -} - -#define ADD_STR_TO_STATE_MACRO(STR) \ - do { \ - mod_state->str_##STR = PyUnicode_FromString(#STR); \ - if (mod_state->str_##STR == NULL) { \ - return -1; \ - } \ - } while(0) - -static inline int -add_type_to_module(PyObject *module, const char *name, - PyType_Spec *type_spec, PyTypeObject **dest) -{ - PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL); - - if (PyModule_AddObjectRef(module, name, temp) < 0) { - Py_XDECREF(temp); - return -1; - } - - *dest = (PyTypeObject*) temp; - - return 0; -} - -static inline int -add_constant_to_type(PyTypeObject *type, const char *name, long value) -{ - PyObject *temp; - - temp = PyLong_FromLong(value); - if (temp == NULL) { - return -1; - } - - int rc = PyObject_SetAttrString((PyObject*) type, name, temp); - Py_DECREF(temp); - return rc; -} - -static int _zstd_exec(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); +#define ADD_TYPE(TYPE, SPEC) \ +do { \ + TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \ + if (TYPE == NULL) { \ + return -1; \ + } \ + if (PyModule_AddType(m, TYPE) < 0) { \ + return -1; \ + } \ +} while (0) + +#define ADD_INT_MACRO(MACRO) \ + if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \ + return -1; \ + } + +#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \ +do { \ + PyObject *v = PyLong_FromLong((VALUE)); \ + if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \ + (NAME), v) < 0) { \ + Py_XDECREF(v); \ + return -1; \ + } \ + Py_DECREF(v); \ +} while (0) + + _zstd_state* mod_state = get_zstd_state(m); /* Reusable objects & variables */ - mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0); - if (mod_state->empty_bytes == NULL) { - return -1; - } - - mod_state->empty_readonly_memoryview = - PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ); - if (mod_state->empty_readonly_memoryview == NULL) { - return -1; - } - - /* Add str to module state */ - ADD_STR_TO_STATE_MACRO(read); - ADD_STR_TO_STATE_MACRO(readinto); - ADD_STR_TO_STATE_MACRO(write); - ADD_STR_TO_STATE_MACRO(flush); - mod_state->CParameter_type = NULL; mod_state->DParameter_type = NULL; - /* Add variables to module */ - if (add_vars_to_module(module) < 0) { - return -1; - } - - /* ZstdError */ + /* Create and add heap types */ + ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec); + ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec); + ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec); mod_state->ZstdError = PyErr_NewExceptionWithDoc( - "_zstd.ZstdError", - "Call to the underlying zstd library failed.", - NULL, NULL); + "compression.zstd.ZstdError", + "An error occurred in the zstd library.", + NULL, NULL); if (mod_state->ZstdError == NULL) { return -1; } - - if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) { - Py_DECREF(mod_state->ZstdError); + if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { return -1; } - /* ZstdDict */ - if (add_type_to_module(module, - "ZstdDict", - &zstddict_type_spec, - &mod_state->ZstdDict_type) < 0) { + /* Add constants */ + if (PyModule_AddIntConstant(m, "zstd_version_number", + ZSTD_versionNumber()) < 0) { return -1; } - // ZstdCompressor - if (add_type_to_module(module, - "ZstdCompressor", - &zstdcompressor_type_spec, - &mod_state->ZstdCompressor_type) < 0) { + if (PyModule_AddStringConstant(m, "zstd_version", + ZSTD_versionString()) < 0) { return -1; } - // Add EndDirective enum to ZstdCompressor - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "CONTINUE", - ZSTD_e_continue) < 0) { +#if ZSTD_VERSION_NUMBER >= 10500 + if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT", + ZSTD_defaultCLevel()) < 0) { return -1; } +#else + ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT); +#endif - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_BLOCK", - ZSTD_e_flush) < 0) { + if (PyModule_Add(m, "ZSTD_DStreamOutSize", + PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) { return -1; } - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_FRAME", - ZSTD_e_end) < 0) { - return -1; - } - - // ZstdDecompressor - if (add_type_to_module(module, - "ZstdDecompressor", - &ZstdDecompressor_type_spec, - &mod_state->ZstdDecompressor_type) < 0) { - return -1; - } + /* Add zstd compression parameters. All should also be in cp_list. */ + ADD_INT_MACRO(ZSTD_c_compressionLevel); + ADD_INT_MACRO(ZSTD_c_windowLog); + ADD_INT_MACRO(ZSTD_c_hashLog); + ADD_INT_MACRO(ZSTD_c_chainLog); + ADD_INT_MACRO(ZSTD_c_searchLog); + ADD_INT_MACRO(ZSTD_c_minMatch); + ADD_INT_MACRO(ZSTD_c_targetLength); + ADD_INT_MACRO(ZSTD_c_strategy); + + ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching); + ADD_INT_MACRO(ZSTD_c_ldmHashLog); + ADD_INT_MACRO(ZSTD_c_ldmMinMatch); + ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog); + ADD_INT_MACRO(ZSTD_c_ldmHashRateLog); + + ADD_INT_MACRO(ZSTD_c_contentSizeFlag); + ADD_INT_MACRO(ZSTD_c_checksumFlag); + ADD_INT_MACRO(ZSTD_c_dictIDFlag); + + ADD_INT_MACRO(ZSTD_c_nbWorkers); + ADD_INT_MACRO(ZSTD_c_jobSize); + ADD_INT_MACRO(ZSTD_c_overlapLog); + + /* Add zstd decompression parameters. All should also be in dp_list. */ + ADD_INT_MACRO(ZSTD_d_windowLogMax); + + /* Add ZSTD_strategy enum members */ + ADD_INT_MACRO(ZSTD_fast); + ADD_INT_MACRO(ZSTD_dfast); + ADD_INT_MACRO(ZSTD_greedy); + ADD_INT_MACRO(ZSTD_lazy); + ADD_INT_MACRO(ZSTD_lazy2); + ADD_INT_MACRO(ZSTD_btlazy2); + ADD_INT_MACRO(ZSTD_btopt); + ADD_INT_MACRO(ZSTD_btultra); + ADD_INT_MACRO(ZSTD_btultra2); + + /* Add ZSTD_EndDirective enum members to ZstdCompressor */ + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "CONTINUE", ZSTD_e_continue); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_BLOCK", ZSTD_e_flush); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_FRAME", ZSTD_e_end); + + /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */ + PyType_Freeze(mod_state->ZstdCompressor_type); + +#undef ADD_TYPE +#undef ADD_INT_MACRO +#undef ADD_ZSTD_COMPRESSOR_INT_CONST return 0; } @@ -837,14 +704,7 @@ static int _zstd_exec(PyObject *module) { static int _zstd_traverse(PyObject *module, visitproc visit, void *arg) { - _zstd_state* const mod_state = get_zstd_state(module); - - Py_VISIT(mod_state->empty_bytes); - Py_VISIT(mod_state->empty_readonly_memoryview); - Py_VISIT(mod_state->str_read); - Py_VISIT(mod_state->str_readinto); - Py_VISIT(mod_state->str_write); - Py_VISIT(mod_state->str_flush); + _zstd_state* mod_state = get_zstd_state(module); Py_VISIT(mod_state->ZstdDict_type); Py_VISIT(mod_state->ZstdCompressor_type); @@ -861,14 +721,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg) static int _zstd_clear(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); - - Py_CLEAR(mod_state->empty_bytes); - Py_CLEAR(mod_state->empty_readonly_memoryview); - Py_CLEAR(mod_state->str_read); - Py_CLEAR(mod_state->str_readinto); - Py_CLEAR(mod_state->str_write); - Py_CLEAR(mod_state->str_flush); + _zstd_state* mod_state = get_zstd_state(module); Py_CLEAR(mod_state->ZstdDict_type); Py_CLEAR(mod_state->ZstdCompressor_type); @@ -890,20 +743,21 @@ _zstd_free(void *module) static struct PyModuleDef_Slot _zstd_slots[] = { {Py_mod_exec, _zstd_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, - - {0} + {0, NULL}, }; -struct PyModuleDef _zstdmodule = { - PyModuleDef_HEAD_INIT, +static struct PyModuleDef _zstdmodule = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "_zstd", + .m_doc = "Implementation module for Zstandard compression.", .m_size = sizeof(_zstd_state), .m_slots = _zstd_slots, .m_methods = _zstd_methods, .m_traverse = _zstd_traverse, .m_clear = _zstd_clear, - .m_free = _zstd_free + .m_free = _zstd_free, }; PyMODINIT_FUNC diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 9322ee259c5124..82226ff8718e6b 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -1,138 +1,28 @@ -#pragma once -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* Declarations shared between different parts of the _zstd module*/ -#include "Python.h" +#ifndef ZSTD_MODULE_H +#define ZSTD_MODULE_H -#include "zstd.h" -#include "zdict.h" +#include "zstddict.h" +/* Type specs */ +extern PyType_Spec zstd_dict_type_spec; +extern PyType_Spec zstd_compressor_type_spec; +extern PyType_Spec zstd_decompressor_type_spec; -/* Forward declaration of module state */ -typedef struct _zstd_state _zstd_state; - -/* Forward reference of module def */ -extern PyModuleDef _zstdmodule; - -/* For clinic type calculations */ -static inline _zstd_state * -get_zstd_state_from_type(PyTypeObject *type) { - PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule); - if (module == NULL) { - return NULL; - } - void *state = PyModule_GetState(module); - assert(state != NULL); - return (_zstd_state *)state; -} - -extern PyType_Spec zstddict_type_spec; -extern PyType_Spec zstdcompressor_type_spec; -extern PyType_Spec ZstdDecompressor_type_spec; - -struct _zstd_state { - PyObject *empty_bytes; - PyObject *empty_readonly_memoryview; - PyObject *str_read; - PyObject *str_readinto; - PyObject *str_write; - PyObject *str_flush; - +typedef struct { + /* Module heap types. */ PyTypeObject *ZstdDict_type; PyTypeObject *ZstdCompressor_type; PyTypeObject *ZstdDecompressor_type; PyObject *ZstdError; + /* enum types set by set_parameter_types. */ PyTypeObject *CParameter_type; PyTypeObject *DParameter_type; -}; - -typedef struct { - PyObject_HEAD - - /* Reusable compress/decompress dictionary, they are created once and - can be shared by multiple threads concurrently, since its usage is - read-only. - c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ - ZSTD_DDict *d_dict; - PyObject *c_dicts; - - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; - /* Dictionary id */ - uint32_t dict_id; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDict; - -typedef struct { - PyObject_HEAD - - /* Compression context */ - ZSTD_CCtx *cctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Last mode, initialized to ZSTD_e_end */ - int last_mode; - - /* (nbWorker >= 1) ? 1 : 0 */ - int use_multithread; - - /* Compression level */ - int compression_level; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdCompressor; - -typedef struct { - PyObject_HEAD - - /* Decompression context */ - ZSTD_DCtx *dctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Unconsumed input data */ - char *input_buffer; - size_t input_buffer_size; - size_t in_begin, in_end; - - /* Unused data */ - PyObject *unused_data; - - /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ - char needs_input; - - /* For decompress(), 0 or 1. - 1 when both input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor - just be initialized. */ - char at_frame_edge; - - /* For ZstdDecompressor, 0 or 1. - 1 means the end of the first frame has been reached. */ - char eof; - - /* Used for fast reset above three variables */ - char _unused_char_for_align; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDecompressor; - -typedef enum { - TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function -} decompress_type; +} _zstd_state; typedef enum { ERR_DECOMPRESS, @@ -147,7 +37,7 @@ typedef enum { ERR_SET_C_LEVEL, ERR_TRAIN_DICT, - ERR_FINALIZE_DICT + ERR_FINALIZE_DICT, } error_type; typedef enum { @@ -156,41 +46,16 @@ typedef enum { DICT_TYPE_PREFIX = 2 } dictionary_type; -static inline int -mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { - return in->size == in->pos && out->size != out->pos; -} +extern ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, + PyObject *dict, int *type); /* Format error message and set ZstdError. */ extern void -set_zstd_error(const _zstd_state* const state, - const error_type type, size_t zstd_ret); +set_zstd_error(const _zstd_state *state, + error_type type, size_t zstd_ret); extern void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v); - -static const char init_twice_msg[] = "__init__ method is called twice."; - -extern int -_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict); - -extern int -_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict); - -extern int -_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char *arg_type); - -extern int -_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options); - -extern PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type); +set_parameter_error(int is_compress, int key_v, int value_v); -extern PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive); +#endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index 319b1214833fcf..0ac7bcb4ddc416 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -1,11 +1,12 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ + +#ifndef ZSTD_BUFFER_H +#define ZSTD_BUFFER_H -#include "_zstdmodule.h" #include "pycore_blocks_output_buffer.h" +#include <zstd.h> // ZSTD_outBuffer + /* Blocks output buffer wrapper code */ /* Initialize the buffer, and grow the buffer. @@ -18,7 +19,8 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, /* Ensure .list was set to NULL */ assert(buffer->list == NULL); - Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, + &ob->dst); if (res < 0) { return -1; } @@ -33,8 +35,7 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, Return -1 on failure */ static inline int _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, - Py_ssize_t max_length, - Py_ssize_t init_size) + Py_ssize_t max_length, Py_ssize_t init_size) { Py_ssize_t block_size; @@ -49,7 +50,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, block_size = init_size; } - Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, + &ob->dst); if (res < 0) { return -1; } @@ -102,3 +104,5 @@ _OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) return buffer->allocated == buffer->max_length; } + +#endif // !ZSTD_BUFFER_H diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h index 2f8225389b7aea..766e1cfa776767 100644 --- a/Modules/_zstd/clinic/_zstdmodule.c.h +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -9,11 +9,11 @@ preserve #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() -PyDoc_STRVAR(_zstd__train_dict__doc__, -"_train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" +PyDoc_STRVAR(_zstd_train_dict__doc__, +"train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" "--\n" "\n" -"Internal function, train a zstd dictionary on sample data.\n" +"Train a Zstandard dictionary on sample data.\n" "\n" " samples_bytes\n" " Concatenation of samples.\n" @@ -22,31 +22,31 @@ PyDoc_STRVAR(_zstd__train_dict__doc__, " dict_size\n" " The size of the dictionary."); -#define _ZSTD__TRAIN_DICT_METHODDEF \ - {"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__}, +#define _ZSTD_TRAIN_DICT_METHODDEF \ + {"train_dict", _PyCFunction_CAST(_zstd_train_dict), METH_FASTCALL, _zstd_train_dict__doc__}, static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size); +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size); static PyObject * -_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *samples_bytes; PyObject *samples_sizes; Py_ssize_t dict_size; - if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) { + if (!_PyArg_CheckPositional("train_dict", nargs, 3, 3)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("train_dict", "argument 1", "bytes", args[0]); goto exit; } samples_bytes = (PyBytesObject *)args[0]; if (!PyTuple_Check(args[1])) { - _PyArg_BadArgument("_train_dict", "argument 2", "tuple", args[1]); + _PyArg_BadArgument("train_dict", "argument 2", "tuple", args[1]); goto exit; } samples_sizes = args[1]; @@ -62,18 +62,18 @@ _zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } dict_size = ival; } - return_value = _zstd__train_dict_impl(module, samples_bytes, samples_sizes, dict_size); + return_value = _zstd_train_dict_impl(module, samples_bytes, samples_sizes, dict_size); exit: return return_value; } -PyDoc_STRVAR(_zstd__finalize_dict__doc__, -"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n" -" samples_sizes, dict_size, compression_level, /)\n" +PyDoc_STRVAR(_zstd_finalize_dict__doc__, +"finalize_dict($module, custom_dict_bytes, samples_bytes, samples_sizes,\n" +" dict_size, compression_level, /)\n" "--\n" "\n" -"Internal function, finalize a zstd dictionary.\n" +"Finalize a Zstandard dictionary.\n" "\n" " custom_dict_bytes\n" " Custom dictionary content.\n" @@ -84,19 +84,19 @@ PyDoc_STRVAR(_zstd__finalize_dict__doc__, " dict_size\n" " The size of the dictionary.\n" " compression_level\n" -" Optimize for a specific zstd compression level, 0 means default."); +" Optimize for a specific Zstandard compression level, 0 means default."); -#define _ZSTD__FINALIZE_DICT_METHODDEF \ - {"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__}, +#define _ZSTD_FINALIZE_DICT_METHODDEF \ + {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__}, static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level); +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level); static PyObject * -_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *custom_dict_bytes; @@ -105,21 +105,21 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) Py_ssize_t dict_size; int compression_level; - if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) { + if (!_PyArg_CheckPositional("finalize_dict", nargs, 5, 5)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("finalize_dict", "argument 1", "bytes", args[0]); goto exit; } custom_dict_bytes = (PyBytesObject *)args[0]; if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]); + _PyArg_BadArgument("finalize_dict", "argument 2", "bytes", args[1]); goto exit; } samples_bytes = (PyBytesObject *)args[1]; if (!PyTuple_Check(args[2])) { - _PyArg_BadArgument("_finalize_dict", "argument 3", "tuple", args[2]); + _PyArg_BadArgument("finalize_dict", "argument 3", "tuple", args[2]); goto exit; } samples_sizes = args[2]; @@ -139,32 +139,31 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (compression_level == -1 && PyErr_Occurred()) { goto exit; } - return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); + return_value = _zstd_finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); exit: return return_value; } -PyDoc_STRVAR(_zstd__get_param_bounds__doc__, -"_get_param_bounds($module, /, parameter, is_compress)\n" +PyDoc_STRVAR(_zstd_get_param_bounds__doc__, +"get_param_bounds($module, /, parameter, is_compress)\n" "--\n" "\n" -"Internal function, get CompressionParameter/DecompressionParameter bounds.\n" +"Get CompressionParameter/DecompressionParameter bounds.\n" "\n" " parameter\n" " The parameter to get bounds.\n" " is_compress\n" " True for CompressionParameter, False for DecompressionParameter."); -#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \ - {"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__}, +#define _ZSTD_GET_PARAM_BOUNDS_METHODDEF \ + {"get_param_bounds", _PyCFunction_CAST(_zstd_get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd_get_param_bounds__doc__}, static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress); +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress); static PyObject * -_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -190,7 +189,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg static const char * const _keywords[] = {"parameter", "is_compress", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_param_bounds", + .fname = "get_param_bounds", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -211,7 +210,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg if (is_compress < 0) { goto exit; } - return_value = _zstd__get_param_bounds_impl(module, parameter, is_compress); + return_value = _zstd_get_param_bounds_impl(module, parameter, is_compress); exit: return return_value; @@ -221,13 +220,11 @@ PyDoc_STRVAR(_zstd_get_frame_size__doc__, "get_frame_size($module, /, frame_buffer)\n" "--\n" "\n" -"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n" +"Get the size of a Zstandard frame, including the header and optional checksum.\n" "\n" " frame_buffer\n" " A bytes-like object, it should start from the beginning of a frame,\n" -" and contains at least one complete frame.\n" -"\n" -"It will iterate all blocks\' headers within a frame, to accumulate the frame size."); +" and contains at least one complete frame."); #define _ZSTD_GET_FRAME_SIZE_METHODDEF \ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, @@ -288,23 +285,23 @@ _zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, return return_value; } -PyDoc_STRVAR(_zstd__get_frame_info__doc__, -"_get_frame_info($module, /, frame_buffer)\n" +PyDoc_STRVAR(_zstd_get_frame_info__doc__, +"get_frame_info($module, /, frame_buffer)\n" "--\n" "\n" -"Internal function, get zstd frame infomation from a frame header.\n" +"Get Zstandard frame infomation from a frame header.\n" "\n" " frame_buffer\n" -" A bytes-like object, containing the header of a zstd frame."); +" A bytes-like object, containing the header of a Zstandard frame."); -#define _ZSTD__GET_FRAME_INFO_METHODDEF \ - {"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__}, +#define _ZSTD_GET_FRAME_INFO_METHODDEF \ + {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__}, static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); static PyObject * -_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -330,7 +327,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, static const char * const _keywords[] = {"frame_buffer", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_frame_info", + .fname = "get_frame_info", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -345,7 +342,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { goto exit; } - return_value = _zstd__get_frame_info_impl(module, &frame_buffer); + return_value = _zstd_get_frame_info_impl(module, &frame_buffer); exit: /* Cleanup for frame_buffer */ @@ -356,26 +353,26 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, return return_value; } -PyDoc_STRVAR(_zstd__set_parameter_types__doc__, -"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" +PyDoc_STRVAR(_zstd_set_parameter_types__doc__, +"set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" "--\n" "\n" -"Internal function, set CompressionParameter/DecompressionParameter types for validity check.\n" +"Set CompressionParameter and DecompressionParameter types for validity check.\n" "\n" " c_parameter_type\n" " CompressionParameter IntEnum type object\n" " d_parameter_type\n" " DecompressionParameter IntEnum type object"); -#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \ - {"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__}, +#define _ZSTD_SET_PARAMETER_TYPES_METHODDEF \ + {"set_parameter_types", _PyCFunction_CAST(_zstd_set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd_set_parameter_types__doc__}, static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type); +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type); static PyObject * -_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -401,7 +398,7 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_set_parameter_types", + .fname = "set_parameter_types", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -415,18 +412,18 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n goto exit; } if (!PyObject_TypeCheck(args[0], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); + _PyArg_BadArgument("set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); goto exit; } c_parameter_type = args[0]; if (!PyObject_TypeCheck(args[1], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); + _PyArg_BadArgument("set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); goto exit; } d_parameter_type = args[1]; - return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type); + return_value = _zstd_set_parameter_types_impl(module, c_parameter_type, d_parameter_type); exit: return return_value; } -/*[clinic end generated code: output=189c462236a7096c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index d7909cdf89fcd1..4f8d93fd9e867c 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -8,30 +8,30 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "ZstdCompressor(level=None, options=None, zstd_dict=None)\n" "--\n" "\n" "Create a compressor object for compressing data incrementally.\n" "\n" " level\n" -" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n" +" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n" " options\n" " A dict object that contains advanced compression parameters.\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" "\n" "Thread-safe at method level. For one-shot compression, use the compress()\n" "function instead."); -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict); +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict); -static int -_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 3 @@ -89,7 +89,7 @@ _zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) } zstd_dict = fastargs[2]; skip_optional_pos: - return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict); + return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict); exit: return return_value; @@ -189,9 +189,9 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, " Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" " ZstdCompressor.FLUSH_BLOCK\n" "\n" -"Flush any remaining data left in internal buffers. Since zstd data consists\n" -"of one or more independent frames, the compressor object can still be used\n" -"after this method is called."); +"Flush any remaining data left in internal buffers. Since Zstandard data\n" +"consists of one or more independent frames, the compressor object can still\n" +"be used after this method is called."); #define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, @@ -252,4 +252,43 @@ _zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__, +"set_pledged_input_size($self, size, /)\n" +"--\n" +"\n" +"Set the uncompressed content size to be written into the frame header.\n" +"\n" +" size\n" +" The size of the uncompressed data to be provided to the compressor.\n" +"\n" +"This method can be used to ensure the header of the frame about to be written\n" +"includes the size of the data, unless the CompressionParameter.content_size_flag\n" +"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n" +"\n" +"It is important to ensure that the pledged data size matches the actual data\n" +"size. If they do not match the compressed output data may be corrupted and the\n" +"final chunk written may be lost."); + +#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \ + {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__}, + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size); + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned long long size; + + if (!zstd_contentsize_converter(arg, &size)) { + goto exit; + } + return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size); + +exit: + return return_value; +} +/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h index 9359c637203f8f..c6fdae74ab0447 100644 --- a/Modules/_zstd/clinic/decompressor.c.h +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -7,31 +7,30 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_abstract.h" // _PyNumber_Index() -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, "ZstdDecompressor(zstd_dict=None, options=None)\n" "--\n" "\n" "Create a decompressor object for decompressing data incrementally.\n" "\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" " options\n" " A dict object that contains advanced decompression parameters.\n" "\n" "Thread-safe at method level. For one-shot decompression, use the decompress()\n" "function instead."); -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options); +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options); -static int -_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 2 @@ -82,7 +81,7 @@ _zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs } options = fastargs[1]; skip_optional_pos: - return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options); + return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options); exit: return return_value; @@ -114,13 +113,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); static PyObject * _zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); } PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, @@ -130,7 +123,7 @@ PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, "Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" "\n" " data\n" -" A bytes-like object, zstd data to be decompressed.\n" +" A bytes-like object, Zstandard data to be decompressed.\n" " max_length\n" " Maximum size of returned data. When it is negative, the size of\n" " output buffer is unlimited. When it is nonnegative, returns at\n" @@ -227,4 +220,4 @@ _zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssiz return return_value; } -/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 4e0f7b64172a74..166d925a542352 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -6,38 +6,35 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDict___init____doc__, -"ZstdDict(dict_content, is_raw=False)\n" +PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, +"ZstdDict(dict_content, /, *, is_raw=False)\n" "--\n" "\n" -"Represents a zstd dictionary, which can be used for compression/decompression.\n" +"Represents a Zstandard dictionary.\n" "\n" " dict_content\n" -" A bytes-like object, dictionary\'s content.\n" +" The content of a Zstandard dictionary as a bytes-like object.\n" " is_raw\n" -" This parameter is for advanced user. True means dict_content\n" -" argument is a \"raw content\" dictionary, free of any format\n" -" restriction. False means dict_content argument is an ordinary\n" -" zstd dictionary, was created by zstd functions, follow a\n" -" specified format.\n" +" If true, perform no checks on *dict_content*, useful for some\n" +" advanced cases. Otherwise, check that the content represents\n" +" a Zstandard dictionary created by the zstd library or CLI.\n" "\n" -"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n" -"ZstdDecompressor objects."); +"The dictionary can be used for compression or decompression, and can be shared\n" +"by multiple ZstdCompressor or ZstdDecompressor objects."); -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw); +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw); -static int -_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 1 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -46,7 +43,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), }, + .ob_item = { &_Py_ID(is_raw), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -55,7 +52,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"dict_content", "is_raw", NULL}; + static const char * const _keywords[] = {"", "is_raw", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "ZstdDict", @@ -66,33 +63,68 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; - PyObject *dict_content; + Py_buffer dict_content = {NULL, NULL}; int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - dict_content = fastargs[0]; + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } if (!noptargs) { - goto skip_optional_pos; + goto skip_optional_kwonly; } is_raw = PyObject_IsTrue(fastargs[1]); if (is_raw < 0) { goto exit; } -skip_optional_pos: - return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw); +skip_optional_kwonly: + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + return return_value; } +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_digested_dict)\n" +"\n" "1. Some advanced compression parameters of compressor may be overridden\n" " by parameters of digested dictionary.\n" "2. ZstdDict has a digested dictionaries cache for each compression level.\n" @@ -120,19 +152,15 @@ _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, "Load as an undigested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"\n" "1. The advanced compression parameters of compressor will not be overridden.\n" "2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" " multiple times, consider reusing a compressor object.\n" @@ -158,23 +186,19 @@ _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, "Load as a prefix to compressor/decompressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_prefix)\n" +"\n" "1. Prefix is compatible with long distance matching, while dictionary is not.\n" "2. It only works for the first frame, then the compressor/decompressor will\n" " return to no prefix state.\n" -"3. When decompressing, must use the same prefix as when compressing.\""); +"3. When decompressing, must use the same prefix as when compressing."); #if defined(_zstd_ZstdDict_as_prefix_DOCSTR) # undef _zstd_ZstdDict_as_prefix_DOCSTR #endif @@ -196,12 +220,6 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f41d9e2e2cc2928f input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index b735981e7476d5..508b136817872b 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -1,123 +1,196 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdCompressor class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type" +class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/ - +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Compression context */ + ZSTD_CCtx *cctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Last mode, initialized to ZSTD_e_end */ + int last_mode; + /* (nbWorker >= 1) ? 1 : 0 */ + int use_multithread; + + /* Compression level */ + int compression_level; + + /* Lock to protect the compression context */ + PyMutex lock; +} ZstdCompressor; #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) -int -_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char* arg_type) +/*[python input] + +class zstd_contentsize_converter(CConverter): + type = 'unsigned long long' + converter = 'zstd_contentsize_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/ + + +static int +zstd_contentsize_converter(PyObject *size, unsigned long long *p) { - size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; + // None means the user indicates the size is unknown. + if (size == Py_None) { + *p = ZSTD_CONTENTSIZE_UNKNOWN; } - - /* Integer compression level */ - if (PyLong_Check(level_or_options)) { - int level = PyLong_AsInt(level_or_options); - if (level == -1 && PyErr_Occurred()) { + else { + /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1 + ZSTD_CONTENTSIZE_ERROR is 0ULL - 2 + Users should only pass values < ZSTD_CONTENTSIZE_ERROR */ + unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size); + /* Here we check for (unsigned long long)-1 as a sign of an error in + PyLong_AsUnsignedLongLong */ + if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) { + *p = ZSTD_CONTENTSIZE_ERROR; + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + return 0; + } + if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) { + *p = ZSTD_CONTENTSIZE_ERROR; PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between %d and %d.", - ZSTD_minCLevel(), ZSTD_maxCLevel()); - return -1; + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; } + *p = pledged_size; + } + return 1; +} - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; +#include "clinic/compressor.c.h" - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); +static int +_zstd_set_c_level(ZstdCompressor *self, int level) +{ + /* Set integer compression level */ + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "illegal compression level %d; the valid range is [%d, %d]", + level, min_level, max_level); + return -1; + } - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; + + /* Set compressionLevel to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; +} + +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdCompressor() argument 'options' must be dict, not %T", + options); + return -1; + } + + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); return -1; } - return 0; - } - /* Options dict */ - if (PyDict_Check(level_or_options)) { - PyObject *key, *value; - Py_ssize_t pos = 0; + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); + return -1; + } - while (PyDict_Next(level_or_options, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == mod_state->DParameter_type) { - PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DecompressionParameter."); - return -1; - } + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } - int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a CompressionParameter attribute."); + if (key_v == ZSTD_c_compressionLevel) { + if (_zstd_set_c_level(self, value_v) < 0) { return -1; } - - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? - int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be an int."); - return -1; + continue; + } + if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; } + } - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } - else if (key_v == ZSTD_c_nbWorkers) { - /* From zstd library doc: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); - return -1; - } + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(1, key_v, value_v); + return -1; } - return 0; } - PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); - return -1; + return 0; } static void @@ -130,12 +203,12 @@ capsule_free_cdict(PyObject *capsule) ZSTD_CDict * _get_CDict(ZstdDict *self, int compressionLevel) { + assert(PyMutex_IsLocked(&self->lock)); PyObject *level = NULL; - PyObject *capsule; + PyObject *capsule = NULL; ZSTD_CDict *cdict; + int ret; - // TODO(emmatyping): refactor critical section code into a lock_held function - Py_BEGIN_CRITICAL_SECTION(self); /* int level object */ level = PyLong_FromLong(compressionLevel); @@ -144,27 +217,23 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Get PyCapsule object from self->c_dicts */ - capsule = PyDict_GetItemWithError(self->c_dicts, level); + ret = PyDict_GetItemRef(self->c_dicts, level, &capsule); + if (ret < 0) { + goto error; + } if (capsule == NULL) { - if (PyErr_Occurred()) { - goto error; - } - /* Create ZSTD_CDict instance */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - cdict = ZSTD_createCDict(dict_buffer, - dict_len, + cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len, compressionLevel); Py_END_ALLOW_THREADS if (cdict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_CDict instance from " + "Zstandard dictionary content."); } goto error; } @@ -177,11 +246,10 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Add PyCapsule object to self->c_dicts */ - if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { - Py_DECREF(capsule); + ret = PyDict_SetItem(self->c_dicts, level, capsule); + if (ret < 0) { goto error; } - Py_DECREF(capsule); } else { /* ZSTD_CDict instance already exists */ @@ -193,61 +261,15 @@ _get_CDict(ZstdDict *self, int compressionLevel) cdict = NULL; success: Py_XDECREF(level); - Py_END_CRITICAL_SECTION(); + Py_XDECREF(capsule); return cdict; } -int -_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { - +static int +_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) +{ size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When compressing, use undigested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_UNDIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_CDict */ ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); @@ -256,28 +278,18 @@ _PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { } /* Reference a prepared dictionary. It overrides some compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary. It doesn't override compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); } else { Py_UNREACHABLE(); @@ -291,28 +303,57 @@ _PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { return 0; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/compressor.c.h" -#undef clinic_state +static int +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When compressing, use undigested dictionary by default. */ + int type = DICT_TYPE_UNDIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + +/*[clinic input] +@classmethod +_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new + level: object = None + The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ static PyObject * -_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ { - ZstdCompressor *self; - self = PyObject_GC_New(ZstdCompressor, type); + ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->use_multithread = 0; - + self->dict = NULL; + self->lock = (PyMutex){0}; /* Compression context */ self->cctx = ZSTD_createCCtx(); if (self->cctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_CCtx instance."); @@ -323,12 +364,56 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject /* Last mode */ self->last_mode = ZSTD_e_end; + if (level != Py_None && options != Py_None) { + PyErr_SetString(PyExc_TypeError, + "Only one of level or options should be used."); + goto error; + } + + /* Set compression level */ + if (level != Py_None) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "illegal compression level; the valid range is [%d, %d]", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { + goto error; + } + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_c_parameters(self, options) < 0) { + goto error; + } + } + + /* Load Zstandard dictionary to compression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_c_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -340,7 +425,11 @@ ZstdCompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free compression context */ - ZSTD_freeCCtx(self->cctx); + if (self->cctx) { + ZSTD_freeCCtx(self->cctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_XDECREF the dict after free the compression context */ Py_CLEAR(self->dict); @@ -350,72 +439,11 @@ ZstdCompressor_dealloc(PyObject *ob) Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdCompressor.__init__ - - level: object = None - The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT. - options: object = None - A dict object that contains advanced compression parameters. - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - -Create a compressor object for compressing data incrementally. - -Thread-safe at method level. For one-shot compression, use the compress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); - return -1; - } - - /* Set compressLevel/options to compression context */ - if (level != Py_None) { - if (_PyZstd_set_c_parameters(self, level, "level", "int") < 0) { - return -1; - } - } - - if (options != Py_None) { - if (_PyZstd_set_c_parameters(self, options, "options", "dict") < 0) { - return -1; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (_PyZstd_load_c_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive) +static PyObject * +compress_lock_held(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -442,12 +470,12 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, } if (_OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t) output_buffer_size) < 0) { + (Py_ssize_t) output_buffer_size) < 0) { goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); @@ -455,10 +483,8 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } @@ -487,9 +513,18 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, return NULL; } +#ifndef NDEBUG +static inline int +mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) +{ + return in->size == in->pos && out->size != out->pos; +} +#endif + static PyObject * -compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) +compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -505,24 +540,25 @@ compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, + ZSTD_e_continue); + } while (out.pos != out.size + && in.pos != in.size + && !ZSTD_isError(zstd_ret)); Py_END_ALLOW_THREADS /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } - /* Like compress_impl(), output as much as possible. */ + /* Like compress_lock_held(), output as much as possible. */ if (out.pos == out.size) { if (_OutputBuffer_Grow(&buffer, &out) < 0) { goto error; @@ -581,14 +617,14 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); /* Compress */ if (self->use_multithread && mode == ZSTD_e_continue) { - ret = compress_mt_continue_impl(self, data); + ret = compress_mt_continue_lock_held(self, data); } else { - ret = compress_impl(self, data, mode); + ret = compress_lock_held(self, data, mode); } if (ret) { @@ -600,7 +636,7 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } @@ -614,14 +650,14 @@ _zstd.ZstdCompressor.flush Finish the compression process. -Flush any remaining data left in internal buffers. Since zstd data consists -of one or more independent frames, the compressor object can still be used -after this method is called. +Flush any remaining data left in internal buffers. Since Zstandard data +consists of one or more independent frames, the compressor object can still +be used after this method is called. [clinic start generated code]*/ static PyObject * _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) -/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/ +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/ { PyObject *ret; @@ -635,8 +671,9 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - ret = compress_impl(self, NULL, mode); + PyMutex_Lock(&self->lock); + + ret = compress_lock_held(self, NULL, mode); if (ret) { self->last_mode = mode; @@ -647,28 +684,79 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } + +/*[clinic input] +_zstd.ZstdCompressor.set_pledged_input_size + + size: zstd_contentsize + The size of the uncompressed data to be provided to the compressor. + / + +Set the uncompressed content size to be written into the frame header. + +This method can be used to ensure the header of the frame about to be written +includes the size of the data, unless the CompressionParameter.content_size_flag +is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised. + +It is important to ensure that the pledged data size matches the actual data +size. If they do not match the compressed output data may be corrupted and the +final chunk written may be lost. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size) +/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/ +{ + // Error occured while converting argument, should be unreachable + assert(size != ZSTD_CONTENTSIZE_ERROR); + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Check the current mode */ + if (self->last_mode != ZSTD_e_end) { + PyErr_SetString(PyExc_ValueError, + "set_pledged_input_size() method must be called " + "when last_mode == FLUSH_FRAME"); + PyMutex_Unlock(&self->lock); + return NULL; + } + + /* Set pledged content size */ + size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size); + PyMutex_Unlock(&self->lock); + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); + return NULL; + } + + Py_RETURN_NONE; +} + static PyMethodDef ZstdCompressor_methods[] = { _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF - - {0} + _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF + {NULL, NULL} }; PyDoc_STRVAR(ZstdCompressor_last_mode_doc, "The last mode used to this compressor object, its value can be .CONTINUE,\n" ".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" -"It can be used to get the current state of a compressor, such as, data flushed,\n" -"a frame ended."); +"It can be used to get the current state of a compressor, such as, data\n" +"flushed, or a frame ended."); static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), - Py_READONLY, ZstdCompressor_last_mode_doc}, - {0} + Py_READONLY, ZstdCompressor_last_mode_doc}, + {NULL} }; static int @@ -690,18 +778,20 @@ ZstdCompressor_clear(PyObject *ob) static PyType_Slot zstdcompressor_slots[] = { {Py_tp_new, _zstd_ZstdCompressor_new}, {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, _zstd_ZstdCompressor___init__}, {Py_tp_methods, ZstdCompressor_methods}, {Py_tp_members, ZstdCompressor_members}, - {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__}, {Py_tp_traverse, ZstdCompressor_traverse}, {Py_tp_clear, ZstdCompressor_clear}, - {0} + {0, 0} }; -PyType_Spec zstdcompressor_type_spec = { - .name = "_zstd.ZstdCompressor", +PyType_Spec zstd_compressor_type_spec = { + .name = "compression.zstd.ZstdCompressor", .basicsize = sizeof(ZstdCompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + // Py_TPFLAGS_IMMUTABLETYPE is not used here as several + // associated constants need to be added to the type. + // PyType_Freeze is called later to set the flag. + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .slots = zstdcompressor_slots, }; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index a4be180c0088fc..026d39f68291da 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -1,176 +1,142 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdDecompressor class definition */ /*[clinic input] module _zstd -class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type" +class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked +#include <stdbool.h> // bool #include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Decompression context */ + ZSTD_DCtx *dctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Unconsumed input data */ + char *input_buffer; + size_t input_buffer_size; + size_t in_begin, in_end; + + /* Unused data */ + PyObject *unused_data; + + /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ + bool needs_input; + + /* For ZstdDecompressor, 0 or 1. + 1 means the end of the first frame has been reached. */ + bool eof; + + /* Lock to protect the decompression context */ + PyMutex lock; +} ZstdDecompressor; #define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) +#include "clinic/decompressor.c.h" + static inline ZSTD_DDict * _get_DDict(ZstdDict *self) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_DDict *ret; - /* Already created */ - if (self->d_dict != NULL) { - return self->d_dict; - } - - Py_BEGIN_CRITICAL_SECTION(self); if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - self->d_dict = ZSTD_createDDict(dict_buffer, - dict_len); + ret = ZSTD_createDDict(self->dict_buffer, self->dict_len); Py_END_ALLOW_THREADS + self->d_dict = ret; if (self->d_dict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_DDict instance from " + "Zstandard dictionary content."); } } } - /* Don't lose any exception */ - ret = self->d_dict; - Py_END_CRITICAL_SECTION(); - - return ret; + return self->d_dict; } -/* Set decompression parameters to decompression context */ -int -_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) +static int +_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) { - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } if (!PyDict_Check(options)) { - PyErr_SetString(PyExc_TypeError, - "options argument should be dict object."); + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); return -1; } - pos = 0; + Py_ssize_t pos = 0; + PyObject *key, *value; while (PyDict_Next(options, &pos, &key, &value)) { /* Check key type */ if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of decompression options dict should " - "NOT be CompressionParameter."); + "compression options dictionary key must not be a " + "CompressionParameter attribute"); return -1; } - /* Both key & value should be 32-bit signed int */ + Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a DecompressionParameter attribute."); return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); return -1; } /* Set parameter to compression context */ - Py_BEGIN_CRITICAL_SECTION(self); - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); - Py_END_CRITICAL_SECTION(); + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 0, key_v, value_v); + set_parameter_error(0, key_v, value_v); return -1; } } return 0; } -/* Load dictionary or prefix to decompression context */ -int -_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +static int +_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) { size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When decompressing, use digested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_DIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_DDict */ ZSTD_DDict *d_dict = _get_DDict(zd); @@ -178,33 +144,20 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) return -1; } /* Reference a prepared dictionary */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); } else { - /* Impossible code path */ - PyErr_SetString(PyExc_SystemError, - "load_d_dict() impossible code path"); - return -1; + Py_UNREACHABLE(); } /* Check error */ @@ -215,22 +168,31 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) return 0; } - +/* Load dictionary or prefix to decompression context */ +static int +_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When decompressing, use digested dictionary by default. */ + int type = DICT_TYPE_DIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} /* - Given the two types of decompressors (defined in _zstdmodule.h): - - typedef enum { - TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function - } decompress_type; - - Decompress implementation for <D>, <E>, pseudo code: + Decompress implementation in pseudo code: initialize_output_buffer while True: decompress_data - set_object_flag # .eof for <D>, .at_frame_edge for <E>. + set_object_flag # .eof if output_buffer_exhausted: if output_buffer_reached_max_length: @@ -240,76 +202,26 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) finish ZSTD_decompressStream()'s size_t return value: - - 0 when a frame is completely decoded and fully flushed, zstd's internal - buffer has no data. + - 0 when a frame is completely decoded and fully flushed, + zstd's internal buffer has no data. - An error code, which can be tested using ZSTD_isError(). - Or any other value > 0, which means there is still some decoding or flushing to do to complete current frame. Note, decompressing "an empty input" in any case will make it > 0. - - <E> supports multiple frames, has an .at_frame_edge flag, it means both the - input and output streams are at a frame edge. The flag can be set by this - statement: - - .at_frame_edge = (zstd_ret == 0) ? 1 : 0 - - But if decompressing "an empty input" at "a frame edge", zstd_ret will be - non-zero, then .at_frame_edge will be wrongly set to false. To solve this - problem, two AFE checks are needed to ensure that: when at "a frame edge", - empty input will not be decompressed. - - // AFE check - if (self->at_frame_edge && in->pos == in->size) { - finish - } - - In <E>, if .at_frame_edge is eventually set to true, but input stream has - unconsumed data (in->pos < in->size), then the outer function - stream_decompress() will set .at_frame_edge to false. In this case, - although the output stream is at a frame edge, for the caller, the input - stream is not at a frame edge, see below diagram. This behavior does not - affect the next AFE check, since (in->pos < in->size). - - input stream: --------------|--- - ^ - output stream: ====================| - ^ */ -PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type) +static PyObject * +decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; PyObject *ret; - /* The first AFE check for setting .at_frame_edge flag */ - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (self->at_frame_edge && in->pos == in->size) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); - return ret; - } - } - /* Initialize the output buffer */ - if (initial_size >= 0) { - if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { - goto error; - } - } - else { - if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { - goto error; - } + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; } assert(out.pos == 0); @@ -321,33 +233,20 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); goto error; } - /* Set .eof/.af_frame_edge flag */ - if (type == TYPE_DECOMPRESSOR) { - /* ZstdDecompressor class stops when a frame is decompressed */ - if (zstd_ret == 0) { - self->eof = 1; - break; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* decompress() function supports multiple frames */ - self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* The second AFE check for setting .at_frame_edge flag */ - if (self->at_frame_edge && in->pos == in->size) { - break; - } + /* Set .eof flag */ + if (zstd_ret == 0) { + /* Stop when a frame is decompressed */ + self->eof = 1; + break; } /* Need to check out before in. Maybe zstd's internal buffer still has - a few bytes can be output, grow the buffer and continue. */ + a few bytes that can be output, grow the buffer and continue. */ if (out.pos == out.size) { /* Output buffer exhausted */ @@ -380,67 +279,40 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, return NULL; } -void -decompressor_reset_session(ZstdDecompressor *self, - decompress_type type) +static void +decompressor_reset_session_lock_held(ZstdDecompressor *self) { - // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here - // and ensure lock is always held + assert(PyMutex_IsLocked(&self->lock)); /* Reset variables */ self->in_begin = 0; self->in_end = 0; - if (type == TYPE_DECOMPRESSOR) { - Py_CLEAR(self->unused_data); - } + Py_CLEAR(self->unused_data); /* Reset variables in one operation */ self->needs_input = 1; - self->at_frame_edge = 1; self->eof = 0; - self->_unused_char_for_align = 0; - /* Resetting session never fail */ + /* Resetting session is guaranteed to never fail */ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); } -PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length, - decompress_type type) +static PyObject * +stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, + Py_ssize_t max_length) { - Py_ssize_t initial_buffer_size = -1; + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; - if (type == TYPE_DECOMPRESSOR) { - /* Check .eof flag */ - if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); - assert(ret == NULL); - goto success; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* Fast path for the first frame */ - if (self->at_frame_edge && self->in_begin == self->in_end) { - /* Read decompressed size */ - uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len); - - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) - - Use ZSTD_findFrameCompressedSize() to check complete frame, - prevent allocating too much memory for small input chunk. */ - - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && - !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) ) - { - initial_buffer_size = (Py_ssize_t) decompressed_size; - } - } + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, + "Already at the end of a Zstandard frame."); + assert(ret == NULL); + return NULL; } /* Prepare input buffer w/wo unconsumed data */ @@ -527,30 +399,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, - max_length, initial_buffer_size, - type); + ret = decompress_lock_held(self, &in, max_length); if (ret == NULL) { goto error; } /* Unconsumed input data */ if (in.pos == in.size) { - if (type == TYPE_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length || self->eof) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + else { + self->needs_input = 1; } if (use_input_buffer) { @@ -564,15 +424,11 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length self->needs_input = 0; - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - self->at_frame_edge = 0; - } - if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ - if (self->input_buffer != NULL && - self->input_buffer_size < data_size) + if (self->input_buffer != NULL + && self->input_buffer_size < data_size) { PyMem_Free(self->input_buffer); self->input_buffer = NULL; @@ -600,47 +456,57 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length } } - goto success; + return ret; error: /* Reset decompressor's states/session */ - decompressor_reset_session(self, type); + decompressor_reset_session_lock_held(self); Py_CLEAR(ret); -success: - - return ret; + return NULL; } +/*[clinic input] +@classmethod +_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options) +/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/ { - ZstdDecompressor *self; - self = PyObject_GC_New(ZstdDecompressor, type); + ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->input_buffer = NULL; self->input_buffer_size = 0; self->in_begin = -1; self->in_end = -1; self->unused_data = NULL; self->eof = 0; + self->dict = NULL; + self->lock = (PyMutex){0}; /* needs_input flag */ self->needs_input = 1; - /* at_frame_edge flag */ - self->at_frame_edge = 1; - /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_DCtx instance."); @@ -648,12 +514,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + /* Load Zstandard dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_d_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_d_parameters(self, options) < 0) { + goto error; + } + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -665,7 +548,11 @@ ZstdDecompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); + if (self->dctx) { + ZSTD_freeDCtx(self->dctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_CLEAR the dict after free decompression context */ Py_CLEAR(self->dict); @@ -682,56 +569,6 @@ ZstdDecompressor_dealloc(PyObject *ob) } /*[clinic input] -_zstd.ZstdDecompressor.__init__ - - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - options: object = None - A dict object that contains advanced decompression parameters. - -Create a decompressor object for decompressing data incrementally. - -Thread-safe at method level. For one-shot decompression, use the decompress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options) -/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (_PyZstd_load_d_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (options != Py_None) { - if (_PyZstd_set_d_parameters(self, options) < 0) { - return -1; - } - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -/*[clinic input] -@critical_section @getter _zstd.ZstdDecompressor.unused_data @@ -743,20 +580,15 @@ decompressed, unused input data after the frame. Otherwise this will be b''. static PyObject * _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) -/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/ +/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/ { PyObject *ret; - /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); if (!self->eof) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); + PyMutex_Unlock(&self->lock); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else { if (self->unused_data == NULL) { @@ -772,8 +604,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) } } - Py_END_CRITICAL_SECTION(); - + PyMutex_Unlock(&self->lock); return ret; } @@ -781,7 +612,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) _zstd.ZstdDecompressor.decompress data: Py_buffer - A bytes-like object, zstd data to be decompressed. + A bytes-like object, Zstandard data to be decompressed. max_length: Py_ssize_t = -1 Maximum size of returned data. When it is negative, the size of output buffer is unlimited. When it is nonnegative, returns at @@ -807,25 +638,19 @@ static PyObject * _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) -/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/ +/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/ { PyObject *ret; /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - - ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR); - Py_END_CRITICAL_SECTION(); + PyMutex_Lock(&self->lock); + ret = stream_decompress_lock_held(self, data, max_length); + PyMutex_Unlock(&self->lock); return ret; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/decompressor.c.h" -#undef clinic_state - static PyMethodDef ZstdDecompressor_methods[] = { _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF - - {0} + {NULL, NULL} }; PyDoc_STRVAR(ZstdDecompressor_eof_doc, @@ -833,24 +658,22 @@ PyDoc_STRVAR(ZstdDecompressor_eof_doc, "after that, an EOFError exception will be raised."); PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, -"If the max_length output limit in .decompress() method has been reached, and\n" -"the decompressor has (or may has) unconsumed input data, it will be set to\n" -"False. In this case, pass b'' to .decompress() method may output further data."); +"If the max_length output limit in .decompress() method has been reached,\n" +"and the decompressor has (or may has) unconsumed input data, it will be set\n" +"to False. In this case, passing b'' to the .decompress() method may output\n" +"further data."); static PyMemberDef ZstdDecompressor_members[] = { {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), Py_READONLY, ZstdDecompressor_eof_doc}, - {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input), Py_READONLY, ZstdDecompressor_needs_input_doc}, - - {0} + {NULL} }; static PyGetSetDef ZstdDecompressor_getset[] = { _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF - - {0} + {NULL} }; static int @@ -873,19 +696,19 @@ ZstdDecompressor_clear(PyObject *ob) static PyType_Slot ZstdDecompressor_slots[] = { {Py_tp_new, _zstd_ZstdDecompressor_new}, {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, _zstd_ZstdDecompressor___init__}, {Py_tp_methods, ZstdDecompressor_methods}, {Py_tp_members, ZstdDecompressor_members}, {Py_tp_getset, ZstdDecompressor_getset}, - {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__}, {Py_tp_traverse, ZstdDecompressor_traverse}, {Py_tp_clear, ZstdDecompressor_clear}, - {0} + {0, 0} }; -PyType_Spec ZstdDecompressor_type_spec = { - .name = "_zstd.ZstdDecompressor", +PyType_Spec zstd_decompressor_type_spec = { + .name = "compression.zstd.ZstdDecompressor", .basicsize = sizeof(ZstdDecompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = ZstdDecompressor_slots, }; diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index a19224c4a6403b..76e966f5b1b52a 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -1,38 +1,67 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdDict class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type" +class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked -#include <stddef.h> // offsetof() +#include <zstd.h> // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() #define ZstdDict_CAST(op) ((ZstdDict *)op) +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: Py_buffer + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw) +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { - ZstdDict *self; - self = PyObject_GC_New(ZstdDict, type); + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; - self->inited = 0; self->d_dict = NULL; + self->dict_buffer = NULL; + self->dict_id = 0; + self->lock = (PyMutex){0}; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -40,12 +69,29 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_U goto error; } - return (PyObject*)self; + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); + goto error; + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; -error: - if (self != NULL) { - PyObject_GC_Del(self); + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); + + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); + goto error; } + + PyObject_GC_Track(self); + + return (PyObject *)self; + +error: + Py_XDECREF(self); return NULL; } @@ -57,121 +103,64 @@ ZstdDict_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free ZSTD_DDict instance */ - ZSTD_freeDDict(self->d_dict); + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } + + assert(!PyMutex_IsLocked(&self->lock)); - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdDict.__init__ - - dict_content: object - A bytes-like object, dictionary's content. - is_raw: bool = False - This parameter is for advanced user. True means dict_content - argument is a "raw content" dictionary, free of any format - restriction. False means dict_content argument is an ordinary - zstd dictionary, was created by zstd functions, follow a - specified format. - -Represents a zstd dictionary, which can be used for compression/decompression. - -It's thread-safe, and can be shared by multiple ZstdCompressor / -ZstdDecompressor objects. -[clinic start generated code]*/ - -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw) -/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - return -1; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstd dictionary content should at least 8 bytes."); - return -1; - } - - /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); - - /* Check validity for ordinary dictionary */ - if (!is_raw && self->dict_id == 0) { - char *msg = "The dict_content argument is not a valid zstd " - "dictionary. The first 4 bytes of a valid zstd dictionary " - "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" - "If you are an advanced user, and can be sure that " - "dict_content argument is a \"raw content\" zstd " - "dictionary, set is_raw parameter to True."; - PyErr_SetString(PyExc_ValueError, msg); - return -1; - } - - // Can only track self once self->dict_content is included - PyObject_GC_Track(self); - return 0; -} - -#define clinic_state() (get_zstd_state(type)) -#include "clinic/zstddict.c.h" -#undef clinic_state - PyDoc_STRVAR(ZstdDict_dictid_doc, -"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" -"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" -"a specified format.\n\n" -"0 means a \"raw content\" dictionary, free of any format restriction, used\n" -"for advanced user."); - -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" -"argument in ZstdDict.__init__() method. It can be used with other programs."); +"The Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary,\n" +"conforming to the standardised format.\n\n" +"A value of zero indicates a 'raw content' dictionary,\n" +"without any restrictions on format or content."); static PyObject * -ZstdDict_str(PyObject *ob) +ZstdDict_repr(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("<ZstdDict dict_id=%u dict_size=%zd>", - dict->dict_id, Py_SIZE(dict->dict_content)); + (unsigned int)dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, - {0} + {NULL} }; /*[clinic input] -@critical_section +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] @getter _zstd.ZstdDict.as_digested_dict Load as a digested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + 1. Some advanced compression parameters of compressor may be overridden by parameters of digested dictionary. 2. ZstdDict has a digested dictionaries cache for each compression level. @@ -182,19 +171,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_undigested_dict Load as an undigested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + 1. The advanced compression parameters of compressor will not be overridden. 2. Loading an undigested dictionary is costly. If load an undigested dictionary multiple times, consider reusing a compressor object. @@ -203,48 +193,46 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_prefix Load as a prefix to compressor/decompressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + 1. Prefix is compatible with long distance matching, while dictionary is not. 2. It only works for the first frame, then the compressor/decompressor will return to no prefix state. -3. When decompressing, must use the same prefix as when compressing." +3. When decompressing, must use the same prefix as when compressing. [clinic start generated code]*/ static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=45b3b6110f36d127]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF - - {0} + {NULL} }; static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -252,7 +240,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); return 0; } @@ -260,7 +247,7 @@ static int ZstdDict_clear(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); return 0; } @@ -269,18 +256,18 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_init, _zstd_ZstdDict___init__}, - {Py_tp_str, ZstdDict_str}, - {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__}, + {Py_tp_repr, ZstdDict_repr}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, {Py_tp_clear, ZstdDict_clear}, - {0} + {0, 0} }; -PyType_Spec zstddict_type_spec = { - .name = "_zstd.ZstdDict", +PyType_Spec zstd_dict_type_spec = { + .name = "compression.zstd.ZstdDict", .basicsize = sizeof(ZstdDict), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = zstddict_slots, }; diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h new file mode 100644 index 00000000000000..e0d3f46b2b14a6 --- /dev/null +++ b/Modules/_zstd/zstddict.h @@ -0,0 +1,29 @@ +/* Low level interface to the Zstandard algorithm & the zstd library. */ + +#ifndef ZSTD_DICT_H +#define ZSTD_DICT_H + +#include <zstd.h> // ZSTD_DDict + +typedef struct { + PyObject_HEAD + + /* Reusable compress/decompress dictionary, they are created once and + can be shared by multiple threads concurrently, since its usage is + read-only. + c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ + ZSTD_DDict *d_dict; + PyObject *c_dicts; + + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + + /* Dictionary id */ + uint32_t dict_id; + + /* Lock to protect the digested dictionaries */ + PyMutex lock; +} ZstdDict; + +#endif // !ZSTD_DICT_H diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 401a3a7072b846..39d505c02596b8 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -13,6 +13,7 @@ #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> // offsetof() #include <stdbool.h> @@ -728,9 +729,7 @@ array_dealloc(PyObject *op) PyObject_GC_UnTrack(op); arrayobject *self = arrayobject_CAST(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); if (self->ob_item != NULL) { PyMem_Free(self->ob_item); } @@ -2839,6 +2838,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_SET_SIZE(self, n); self->allocated = n; } + else { + PyMem_Free(ustr); + } } else { // c == 'w' Py_ssize_t n = PyUnicode_GET_LENGTH(initial); diff --git a/Modules/blake2module.c b/Modules/blake2module.c index f9acc57f1b2fa3..ae37e2d3383f9b 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,12 +43,11 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif // ECX @@ -114,31 +113,32 @@ void detect_cpu_features(cpu_flags *flags) { } } -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something + // For now this is Intel-only, could conceivably be if'd to something // else. return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; } #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 static inline bool has_simd256(cpu_flags *flags) { return flags->avx && flags->avx2; } #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b.h" #include "_hacl/Hacl_Hash_Blake2s.h" -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif @@ -165,7 +165,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State* blake2_get_state_from_type(PyTypeObject *module) { @@ -329,18 +329,18 @@ static inline bool is_blake2s(blake2_impl impl) { } static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State* st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (has_simd256(&st->flags)) return Blake2b_256; else #endif return Blake2b; } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (has_simd128(&st->flags)) return Blake2s_128; else @@ -356,10 +356,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -425,14 +425,14 @@ static void update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { - // These need to be ifdef'd out otherwise it's an unresolved symbol at - // link-time. -#ifdef HACL_CAN_COMPILE_SIMD256 + // blake2b_256_state and blake2s_128_state must be if'd since + // otherwise this results in an unresolved symbol at link-time. +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len); return; #endif -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len); return; @@ -468,12 +468,12 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -591,7 +591,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, }; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&params, last_node, key->buf); if (self->blake2b_256_state == NULL) { @@ -601,7 +601,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&params, last_node, key->buf); if (self->blake2s_128_state == NULL) { @@ -655,8 +655,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /*[clinic input] @classmethod _blake2.blake2b.__new__ as py_blake2b_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = _blake2.blake2b.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -670,26 +669,31 @@ _blake2.blake2b.__new__ as py_blake2b_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2b hash object. [clinic start generated code]*/ static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=32bfd8f043c6896f input=8fee2b7b11428b2d]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=de64bd850606b6a0 input=78cf60a2922d2f90]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } /*[clinic input] @classmethod _blake2.blake2s.__new__ as py_blake2s_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = _blake2.blake2s.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -703,18 +707,24 @@ _blake2.blake2s.__new__ as py_blake2s_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2s hash object. [clinic start generated code]*/ static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=556181f73905c686 input=8165a11980eac7f3]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=582a0c4295cc3a3c input=6843d6332eefd295]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } @@ -723,7 +733,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state); if (cpy->blake2b_256_state == NULL) { @@ -732,7 +742,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state); if (cpy->blake2s_128_state == NULL) { @@ -843,12 +853,12 @@ _blake2_blake2b_digest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -881,12 +891,12 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -937,11 +947,11 @@ py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) { Blake2Object *self = _Blake2Object_CAST(op); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length); #endif @@ -972,7 +982,7 @@ py_blake2_clear(PyObject *op) // it. If an error occurs in the constructor, we should only free // states that were allocated (i.e. that are not NULL). switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: if (self->blake2b_256_state != NULL) { Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state); @@ -980,7 +990,7 @@ py_blake2_clear(PyObject *op) } break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: if (self->blake2s_128_state != NULL) { Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state); diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index ef6faeb71274e1..b1984df2695b17 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data, return coded; case 2: /* second character of unicode pair */ - coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], - jisx0213_pair_encmap, JISX0213_ENCPAIRS); - if (coded != DBCINV) - return coded; + if (data[1] != 0) { /* Don't consume null char as part of pair */ + coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], + jisx0213_pair_encmap, JISX0213_ENCPAIRS); + if (coded != DBCINV) { + return coded; + } + } _Py_FALLTHROUGH; case -1: /* flush unterminated */ diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index f7127487aa5f59..cd77888d5514b8 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -192,8 +192,11 @@ ENCODER(euc_jis_2004) JISX0213_ENCPAIRS); if (code == DBCINV) return 1; - } else + } + else if (c2 != 0) { + /* Don't consume null char as part of pair */ insize = 2; + } } } } @@ -611,8 +614,10 @@ ENCODER(shift_jis_2004) if (code == DBCINV) return 1; } - else + else if (ch2 != 0) { + /* Don't consume null char as part of pair */ insize = 2; + } } } } diff --git a/Modules/clinic/_csv.c.h b/Modules/clinic/_csv.c.h index 416aeafccf917b..b8dd8ac35fac59 100644 --- a/Modules/clinic/_csv.c.h +++ b/Modules/clinic/_csv.c.h @@ -12,9 +12,7 @@ PyDoc_STRVAR(_csv_list_dialects__doc__, "list_dialects($module, /)\n" "--\n" "\n" -"Return a list of all known dialect names.\n" -"\n" -" names = csv.list_dialects()"); +"Return a list of all known dialect names."); #define _CSV_LIST_DIALECTS_METHODDEF \ {"list_dialects", (PyCFunction)_csv_list_dialects, METH_NOARGS, _csv_list_dialects__doc__}, @@ -32,9 +30,7 @@ PyDoc_STRVAR(_csv_unregister_dialect__doc__, "unregister_dialect($module, /, name)\n" "--\n" "\n" -"Delete the name/dialect mapping associated with a string name.\n" -"\n" -" csv.unregister_dialect(name)"); +"Delete the name/dialect mapping associated with a string name."); #define _CSV_UNREGISTER_DIALECT_METHODDEF \ {"unregister_dialect", _PyCFunction_CAST(_csv_unregister_dialect), METH_FASTCALL|METH_KEYWORDS, _csv_unregister_dialect__doc__}, @@ -92,9 +88,7 @@ PyDoc_STRVAR(_csv_get_dialect__doc__, "get_dialect($module, /, name)\n" "--\n" "\n" -"Return the dialect instance associated with name.\n" -"\n" -" dialect = csv.get_dialect(name)"); +"Return the dialect instance associated with name."); #define _CSV_GET_DIALECT_METHODDEF \ {"get_dialect", _PyCFunction_CAST(_csv_get_dialect), METH_FASTCALL|METH_KEYWORDS, _csv_get_dialect__doc__}, @@ -154,8 +148,6 @@ PyDoc_STRVAR(_csv_field_size_limit__doc__, "\n" "Sets an upper limit on parsed fields.\n" "\n" -" csv.field_size_limit([limit])\n" -"\n" "Returns old limit. If limit is not given, no new limit is set and\n" "the old limit is returned"); @@ -215,4 +207,4 @@ _csv_field_size_limit(PyObject *module, PyObject *const *args, Py_ssize_t nargs, exit: return return_value; } -/*[clinic end generated code: output=1fb09d5e7667ad0d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ed77cb69fad9f3b4 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 552360eb80a545..8172440dc58c9d 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -301,7 +301,7 @@ PyDoc_STRVAR(_curses_window_attron__doc__, "attron($self, attr, /)\n" "--\n" "\n" -"Add attribute attr from the \"background\" set."); +"Add attribute attr to the \"background\" set."); #define _CURSES_WINDOW_ATTRON_METHODDEF \ {"attron", (PyCFunction)_curses_window_attron, METH_O, _curses_window_attron__doc__}, @@ -768,7 +768,7 @@ PyDoc_STRVAR(_curses_window_getch__doc__, #define _CURSES_WINDOW_GETCH_METHODDEF \ {"getch", (PyCFunction)_curses_window_getch, METH_VARARGS, _curses_window_getch__doc__}, -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x); @@ -779,7 +779,6 @@ _curses_window_getch(PyObject *self, PyObject *args) int group_right_1 = 0; int y = 0; int x = 0; - int _return_value; switch (PyTuple_GET_SIZE(args)) { case 0: @@ -794,11 +793,7 @@ _curses_window_getch(PyObject *self, PyObject *args) PyErr_SetString(PyExc_TypeError, "_curses.window.getch requires 0 to 2 arguments"); goto exit; } - _return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); + return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); exit: return return_value; @@ -4440,4 +4435,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=42b2923d88c8d0f6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=78252f8805206d95 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h index 5e503194408776..091ce9edc43d4b 100644 --- a/Modules/clinic/_dbmmodule.c.h +++ b/Modules/clinic/_dbmmodule.c.h @@ -5,6 +5,7 @@ preserve #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_dbm_dbm_close__doc__, @@ -22,7 +23,13 @@ _dbm_dbm_close_impl(dbmobject *self); static PyObject * _dbm_dbm_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _dbm_dbm_close_impl((dbmobject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_close_impl((dbmobject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_dbm_dbm_keys__doc__, @@ -40,11 +47,18 @@ _dbm_dbm_keys_impl(dbmobject *self, PyTypeObject *cls); static PyObject * _dbm_dbm_keys(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "keys() takes no arguments"); - return NULL; + goto exit; } - return _dbm_dbm_keys_impl((dbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_keys_impl((dbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_dbm_dbm_get__doc__, @@ -85,7 +99,9 @@ _dbm_dbm_get(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_ &key, &key_length, &default_value)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _dbm_dbm_get_impl((dbmobject *)self, cls, key, key_length, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -131,7 +147,9 @@ _dbm_dbm_setdefault(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py &key, &key_length, &default_value)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _dbm_dbm_setdefault_impl((dbmobject *)self, cls, key, key_length, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -152,11 +170,18 @@ _dbm_dbm_clear_impl(dbmobject *self, PyTypeObject *cls); static PyObject * _dbm_dbm_clear(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "clear() takes no arguments"); - return NULL; + goto exit; } - return _dbm_dbm_clear_impl((dbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _dbm_dbm_clear_impl((dbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(dbmopen__doc__, @@ -221,4 +246,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=3b456118f231b160 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=279511ea7cda38dd input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h index 00950f18e53541..6fd6aa3da50335 100644 --- a/Modules/clinic/_gdbmmodule.c.h +++ b/Modules/clinic/_gdbmmodule.c.h @@ -5,6 +5,7 @@ preserve #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_gdbm_gdbm_get__doc__, @@ -70,7 +71,9 @@ _gdbm_gdbm_setdefault(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } default_value = args[1]; skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _gdbm_gdbm_setdefault_impl((gdbmobject *)self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -91,7 +94,13 @@ _gdbm_gdbm_close_impl(gdbmobject *self); static PyObject * _gdbm_gdbm_close(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _gdbm_gdbm_close_impl((gdbmobject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_close_impl((gdbmobject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_keys__doc__, @@ -109,11 +118,18 @@ _gdbm_gdbm_keys_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_keys(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "keys() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_keys_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_keys_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_firstkey__doc__, @@ -135,11 +151,18 @@ _gdbm_gdbm_firstkey_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_firstkey(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "firstkey() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_firstkey_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_firstkey_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_nextkey__doc__, @@ -187,7 +210,9 @@ _gdbm_gdbm_nextkey(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ &key, &key_length)) { goto exit; } + Py_BEGIN_CRITICAL_SECTION(self); return_value = _gdbm_gdbm_nextkey_impl((gdbmobject *)self, cls, key, key_length); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -214,11 +239,18 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_reorganize(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "reorganize() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_reorganize_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_reorganize_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_sync__doc__, @@ -239,11 +271,18 @@ _gdbm_gdbm_sync_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_sync(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "sync() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_sync_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_sync_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_gdbm_gdbm_clear__doc__, @@ -261,11 +300,18 @@ _gdbm_gdbm_clear_impl(gdbmobject *self, PyTypeObject *cls); static PyObject * _gdbm_gdbm_clear(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "clear() takes no arguments"); - return NULL; + goto exit; } - return _gdbm_gdbm_clear_impl((gdbmobject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _gdbm_gdbm_clear_impl((gdbmobject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(dbmopen__doc__, @@ -343,4 +389,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=d974cb39e4ee5d67 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8bca34ce9d4493dd input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index 59ab46ca3f0978..6d55f4617a8b08 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -232,8 +232,8 @@ EVPXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje #endif /* defined(PY_OPENSSL_HAS_SHAKE) */ -PyDoc_STRVAR(EVP_new__doc__, -"new($module, /, name, string=b\'\', *, usedforsecurity=True)\n" +PyDoc_STRVAR(_hashlib_new__doc__, +"new($module, /, name, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new hash object using the named algorithm.\n" @@ -243,20 +243,20 @@ PyDoc_STRVAR(EVP_new__doc__, "\n" "The MD5 and SHA1 algorithms are always supported."); -#define EVP_NEW_METHODDEF \ - {"new", _PyCFunction_CAST(EVP_new), METH_FASTCALL|METH_KEYWORDS, EVP_new__doc__}, +#define _HASHLIB_NEW_METHODDEF \ + {"new", _PyCFunction_CAST(_hashlib_new), METH_FASTCALL|METH_KEYWORDS, _hashlib_new__doc__}, static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity); +_hashlib_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * -EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_hashlib_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -265,7 +265,7 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(name), &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(name), &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -274,30 +274,43 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"name", "string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"name", "data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "new", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *name_obj; - PyObject *data_obj = NULL; + const char *name; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - name_obj = args[0]; + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("new", "argument 'name'", "str", args[0]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } if (args[1]) { - data_obj = args[1]; + data = args[1]; if (!--noptargs) { goto skip_optional_pos; } @@ -306,19 +319,25 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[2]); - if (usedforsecurity < 0) { - goto exit; + if (args[2]) { + usedforsecurity = PyObject_IsTrue(args[2]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[3]; skip_optional_kwonly: - return_value = EVP_new_impl(module, name_obj, data_obj, usedforsecurity); + return_value = _hashlib_new_impl(module, name, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_md5__doc__, -"openssl_md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a md5 hash object; optionally initialized with a string"); @@ -327,8 +346,8 @@ PyDoc_STRVAR(_hashlib_openssl_md5__doc__, {"openssl_md5", _PyCFunction_CAST(_hashlib_openssl_md5), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_md5__doc__}, static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -336,7 +355,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -345,7 +364,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -354,17 +373,18 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -375,7 +395,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -384,19 +404,25 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_md5_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_md5_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, -"openssl_sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a sha1 hash object; optionally initialized with a string"); @@ -405,8 +431,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, {"openssl_sha1", _PyCFunction_CAST(_hashlib_openssl_sha1), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha1__doc__}, static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -414,7 +440,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -423,7 +449,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -432,17 +458,18 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -453,7 +480,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -462,19 +489,26 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha1_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha1_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, -"openssl_sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha224 hash object; optionally initialized with a string"); @@ -483,8 +517,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, {"openssl_sha224", _PyCFunction_CAST(_hashlib_openssl_sha224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha224__doc__}, static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -492,7 +526,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -501,7 +535,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -510,17 +544,18 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -531,7 +566,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -540,19 +575,26 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha224_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, -"openssl_sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha256 hash object; optionally initialized with a string"); @@ -561,8 +603,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, {"openssl_sha256", _PyCFunction_CAST(_hashlib_openssl_sha256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha256__doc__}, static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -570,7 +612,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -579,7 +621,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -588,17 +630,18 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -609,7 +652,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -618,19 +661,26 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha256_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, -"openssl_sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha384 hash object; optionally initialized with a string"); @@ -639,8 +689,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, {"openssl_sha384", _PyCFunction_CAST(_hashlib_openssl_sha384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha384__doc__}, static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -648,7 +698,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -657,7 +707,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -666,17 +716,18 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -687,7 +738,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -696,19 +747,26 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha384_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, -"openssl_sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha512 hash object; optionally initialized with a string"); @@ -717,8 +775,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, {"openssl_sha512", _PyCFunction_CAST(_hashlib_openssl_sha512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha512__doc__}, static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -726,7 +784,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -735,7 +793,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -744,17 +802,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -765,7 +824,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -774,12 +833,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -788,7 +853,8 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, -"openssl_sha3_224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-224 hash object; optionally initialized with a string"); @@ -797,8 +863,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, {"openssl_sha3_224", _PyCFunction_CAST(_hashlib_openssl_sha3_224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_224__doc__}, static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -806,7 +872,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -815,7 +881,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -824,17 +890,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -845,7 +912,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -854,12 +921,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_224_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -870,7 +943,8 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, -"openssl_sha3_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-256 hash object; optionally initialized with a string"); @@ -879,8 +953,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, {"openssl_sha3_256", _PyCFunction_CAST(_hashlib_openssl_sha3_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_256__doc__}, static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -888,7 +962,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -897,7 +971,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -906,17 +980,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -927,7 +1002,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -936,12 +1011,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -952,7 +1033,8 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, -"openssl_sha3_384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-384 hash object; optionally initialized with a string"); @@ -961,8 +1043,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, {"openssl_sha3_384", _PyCFunction_CAST(_hashlib_openssl_sha3_384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_384__doc__}, static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -970,7 +1052,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -979,7 +1061,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -988,17 +1070,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1009,7 +1092,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1018,12 +1101,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_384_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1034,7 +1123,8 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, -"openssl_sha3_512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-512 hash object; optionally initialized with a string"); @@ -1043,8 +1133,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, {"openssl_sha3_512", _PyCFunction_CAST(_hashlib_openssl_sha3_512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_512__doc__}, static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1052,7 +1142,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1061,7 +1151,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1070,17 +1160,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1091,7 +1182,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1100,12 +1191,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1116,7 +1213,8 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, -"openssl_shake_128($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_128($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-128 variable hash object; optionally initialized with a string"); @@ -1125,8 +1223,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, {"openssl_shake_128", _PyCFunction_CAST(_hashlib_openssl_shake_128), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_128__doc__}, static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1134,7 +1232,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1143,7 +1241,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1152,17 +1250,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_128", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1173,7 +1272,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1182,12 +1281,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_128_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_128_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1198,7 +1303,8 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, -"openssl_shake_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-256 variable hash object; optionally initialized with a string"); @@ -1207,8 +1313,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, {"openssl_shake_256", _PyCFunction_CAST(_hashlib_openssl_shake_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_256__doc__}, static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1216,7 +1322,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1225,7 +1331,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1234,17 +1340,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1255,7 +1362,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1264,12 +1371,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1871,4 +1984,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=2c78822e38be64a8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a863ec4166ed2fbb input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 81d108627265ab..b43155b6c24e3c 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_heapq_heappush__doc__, @@ -32,7 +33,9 @@ _heapq_heappush(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -61,7 +64,9 @@ _heapq_heappop(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -103,7 +108,9 @@ _heapq_heapreplace(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -140,7 +147,9 @@ _heapq_heappushpop(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -169,7 +178,9 @@ _heapq_heapify(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -203,7 +214,9 @@ _heapq_heappush_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -232,7 +245,9 @@ _heapq_heappop_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -266,7 +281,9 @@ _heapq_heapreplace_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -295,7 +312,9 @@ _heapq_heapify_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -332,9 +351,11 @@ _heapq_heappushpop_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; } -/*[clinic end generated code: output=f55d8595ce150c76 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e83d50002c29a96d input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_lsprof.c.h b/Modules/clinic/_lsprof.c.h index 2918a6bc7abe74..acb4aaf27e377f 100644 --- a/Modules/clinic/_lsprof.c.h +++ b/Modules/clinic/_lsprof.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_lsprof_Profiler_getstats__doc__, @@ -45,11 +46,18 @@ _lsprof_Profiler_getstats_impl(ProfilerObject *self, PyTypeObject *cls); static PyObject * _lsprof_Profiler_getstats(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { PyErr_SetString(PyExc_TypeError, "getstats() takes no arguments"); - return NULL; + goto exit; } - return _lsprof_Profiler_getstats_impl((ProfilerObject *)self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _lsprof_Profiler_getstats_impl((ProfilerObject *)self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_lsprof_Profiler__pystart_callback__doc__, @@ -76,7 +84,44 @@ _lsprof_Profiler__pystart_callback(PyObject *self, PyObject *const *args, Py_ssi } code = args[0]; instruction_offset = args[1]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _lsprof_Profiler__pystart_callback_impl((ProfilerObject *)self, code, instruction_offset); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler__pythrow_callback__doc__, +"_pythrow_callback($self, code, instruction_offset, exception, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF \ + {"_pythrow_callback", _PyCFunction_CAST(_lsprof_Profiler__pythrow_callback), METH_FASTCALL, _lsprof_Profiler__pythrow_callback__doc__}, + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception); + +static PyObject * +_lsprof_Profiler__pythrow_callback(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *exception; + + if (!_PyArg_CheckPositional("_pythrow_callback", nargs, 3, 3)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + exception = args[2]; + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _lsprof_Profiler__pythrow_callback_impl((ProfilerObject *)self, code, instruction_offset, exception); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -110,7 +155,9 @@ _lsprof_Profiler__pyreturn_callback(PyObject *self, PyObject *const *args, Py_ss code = args[0]; instruction_offset = args[1]; retval = args[2]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _lsprof_Profiler__pyreturn_callback_impl((ProfilerObject *)self, code, instruction_offset, retval); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -145,7 +192,9 @@ _lsprof_Profiler__ccall_callback(PyObject *self, PyObject *const *args, Py_ssize instruction_offset = args[1]; callable = args[2]; self_arg = args[3]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _lsprof_Profiler__ccall_callback_impl((ProfilerObject *)self, code, instruction_offset, callable, self_arg); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -182,7 +231,9 @@ _lsprof_Profiler__creturn_callback(PyObject *self, PyObject *const *args, Py_ssi instruction_offset = args[1]; callable = args[2]; self_arg = args[3]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _lsprof_Profiler__creturn_callback_impl((ProfilerObject *)self, code, instruction_offset, callable, self_arg); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -266,7 +317,9 @@ _lsprof_Profiler_enable(PyObject *self, PyObject *const *args, Py_ssize_t nargs, goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _lsprof_Profiler_enable_impl((ProfilerObject *)self, subcalls, builtins); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -287,7 +340,13 @@ _lsprof_Profiler_disable_impl(ProfilerObject *self); static PyObject * _lsprof_Profiler_disable(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _lsprof_Profiler_disable_impl((ProfilerObject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _lsprof_Profiler_disable_impl((ProfilerObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(_lsprof_Profiler_clear__doc__, @@ -305,7 +364,13 @@ _lsprof_Profiler_clear_impl(ProfilerObject *self); static PyObject * _lsprof_Profiler_clear(PyObject *self, PyObject *Py_UNUSED(ignored)) { - return _lsprof_Profiler_clear_impl((ProfilerObject *)self); + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _lsprof_Profiler_clear_impl((ProfilerObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; } PyDoc_STRVAR(profiler_init__doc__, @@ -411,4 +476,4 @@ profiler_init(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=fe231309776df7a7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=af26a0b0ddcc3351 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_randommodule.c.h b/Modules/clinic/_randommodule.c.h index 1e989e970c9de5..2563a16aea0b6f 100644 --- a/Modules/clinic/_randommodule.c.h +++ b/Modules/clinic/_randommodule.c.h @@ -3,6 +3,7 @@ preserve [clinic start generated code]*/ #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_long.h" // _PyLong_UInt64_Converter() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_random_Random_random__doc__, @@ -124,16 +125,15 @@ PyDoc_STRVAR(_random_Random_getrandbits__doc__, {"getrandbits", (PyCFunction)_random_Random_getrandbits, METH_O, _random_Random_getrandbits__doc__}, static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k); +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k); static PyObject * _random_Random_getrandbits(PyObject *self, PyObject *arg) { PyObject *return_value = NULL; - int k; + uint64_t k; - k = PyLong_AsInt(arg); - if (k == -1 && PyErr_Occurred()) { + if (!_PyLong_UInt64_Converter(arg, &k)) { goto exit; } Py_BEGIN_CRITICAL_SECTION(self); @@ -143,4 +143,4 @@ _random_Random_getrandbits(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=4458b5a69201ebea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7ce97b2194eecaf7 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h new file mode 100644 index 00000000000000..f6a51cdba6b401 --- /dev/null +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -0,0 +1,291 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, +"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" +" debug=False)\n" +"--\n" +"\n" +"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" +"\n" +"Args:\n" +" pid: Process ID of the target Python process to debug\n" +" all_threads: If True, initialize state for all threads in the process.\n" +" If False, only initialize for the main thread.\n" +" only_active_thread: If True, only sample the thread holding the GIL.\n" +" Cannot be used together with all_threads=True.\n" +" debug: If True, chain exceptions to explain the sequence of events that\n" +" lead to the exception.\n" +"\n" +"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" +"process, including examining thread states, stack frames and other runtime data.\n" +"\n" +"Raises:\n" +" PermissionError: If access to the target process is denied\n" +" OSError: If unable to attach to the target process or access its memory\n" +" RuntimeError: If unable to read debug information from the target process\n" +" ValueError: If both all_threads and only_active_thread are True"); + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads, + int only_active_thread, + int debug); + +static int +_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(debug), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "debug", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "RemoteUnwinder", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + int pid; + int all_threads = 0; + int only_active_thread = 0; + int debug = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + pid = PyLong_AsInt(fastargs[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (fastargs[1]) { + all_threads = PyObject_IsTrue(fastargs[1]); + if (all_threads < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[2]) { + only_active_thread = PyObject_IsTrue(fastargs[2]); + if (only_active_thread < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + debug = PyObject_IsTrue(fastargs[3]); + if (debug < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, debug); + +exit: + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__, +"get_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns a list of stack traces for threads in the target process.\n" +"\n" +"Each element in the returned list is a tuple of (thread_id, frame_list), where:\n" +"- thread_id is the OS thread identifier\n" +"- frame_list is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for that thread, ordered from most recent to oldest\n" +"\n" +"The threads returned depend on the initialization parameters:\n" +"- If only_active_thread was True: returns only the thread holding the GIL\n" +"- If all_threads was True: returns all threads\n" +"- Otherwise: returns only the main thread\n" +"\n" +"Example:\n" +" [\n" +" (1234, [\n" +" (\'process_data\', \'worker.py\', 127),\n" +" (\'run_worker\', \'worker.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ]),\n" +" (1235, [\n" +" (\'handle_request\', \'server.py\', 89),\n" +" (\'serve_forever\', \'server.py\', 52)\n" +" ])\n" +" ]\n" +"\n" +"Raises:\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF \ + {"get_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__, +"get_all_awaited_by($self, /)\n" +"--\n" +"\n" +"Get all tasks and their awaited_by relationships from the remote process.\n" +"\n" +"This provides a tree structure showing which tasks are waiting for other tasks.\n" +"\n" +"For each task, returns:\n" +"1. The call stack frames leading to where the task is currently executing\n" +"2. The name of the task\n" +"3. A list of tasks that this task is waiting for, with their own frames/names/etc\n" +"\n" +"Returns a list of [frames, task_name, subtasks] where:\n" +"- frames: List of (func_name, filename, lineno) showing the call stack\n" +"- task_name: String identifier for the task\n" +"- subtasks: List of tasks being awaited by this task, in same format\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the remote process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output:\n" +"[\n" +" [\n" +" [(\"c5\", \"script.py\", 10), (\"c4\", \"script.py\", 14)],\n" +" \"c2_root\",\n" +" [\n" +" [\n" +" [(\"c1\", \"script.py\", 23)],\n" +" \"sub_main_2\",\n" +" [...]\n" +" ],\n" +" [...]\n" +" ]\n" +" ]\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF \ + {"get_all_awaited_by", (PyCFunction)_remote_debugging_RemoteUnwinder_get_all_awaited_by, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__, +"get_async_stack_trace($self, /)\n" +"--\n" +"\n" +"Get the currently running async tasks and their dependency graphs from the remote process.\n" +"\n" +"This returns information about running tasks and all tasks that are waiting for them,\n" +"forming a complete dependency graph for each thread\'s active task.\n" +"\n" +"For each thread with a running task, returns the running task plus all tasks that\n" +"transitively depend on it (tasks waiting for the running task, tasks waiting for\n" +"those tasks, etc.).\n" +"\n" +"Returns a list of per-thread results, where each thread result contains:\n" +"- Thread ID\n" +"- List of task information for the running task and all its waiters\n" +"\n" +"Each task info contains:\n" +"- Task ID (memory address)\n" +"- Task name\n" +"- Call stack frames: List of (func_name, filename, lineno)\n" +"- List of tasks waiting for this task (recursive structure)\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the target process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output (similar structure to get_all_awaited_by but only for running tasks):\n" +"[\n" +" (140234, [\n" +" (4345585712, \'main_task\',\n" +" [(\"run_server\", \"server.py\", 127), (\"main\", \"app.py\", 23)],\n" +" [\n" +" (4345585800, \'worker_1\', [...], [...]),\n" +" (4345585900, \'worker_2\', [...], [...])\n" +" ])\n" +" ])\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF \ + {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} +/*[clinic end generated code: output=0dd1e6e8bab2a8b1 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_ssl.c.h b/Modules/clinic/_ssl.c.h index c6e2abd4d93474..6c6ef4cebd7998 100644 --- a/Modules/clinic/_ssl.c.h +++ b/Modules/clinic/_ssl.c.h @@ -47,7 +47,7 @@ static PyObject * _ssl__test_decode_cert(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; - PyObject *path; + PyObject *path = NULL; if (!PyUnicode_FSConverter(arg, &path)) { goto exit; @@ -55,6 +55,9 @@ _ssl__test_decode_cert(PyObject *module, PyObject *arg) return_value = _ssl__test_decode_cert_impl(module, path); exit: + /* Cleanup for path */ + Py_XDECREF(path); + return return_value; } @@ -2900,4 +2903,4 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #ifndef _SSL_ENUM_CRLS_METHODDEF #define _SSL_ENUM_CRLS_METHODDEF #endif /* !defined(_SSL_ENUM_CRLS_METHODDEF) */ -/*[clinic end generated code: output=748650909fec8906 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=cbd8526cd625c433 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index bb2e308574a50a..9e9cd56e569b24 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -10,20 +10,21 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_blake2b_new__doc__, -"blake2b(data=b\'\', /, *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" +"blake2b(data=b\'\', *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2b hash object."); static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -31,7 +32,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -40,7 +41,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -49,18 +50,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2b", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -73,18 +74,23 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -182,12 +188,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2b_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -207,20 +219,21 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } PyDoc_STRVAR(py_blake2s_new__doc__, -"blake2s(data=b\'\', /, *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" +"blake2s(data=b\'\', *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2s hash object."); static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -228,7 +241,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -237,7 +250,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -246,18 +259,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2s", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -270,18 +283,23 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -379,12 +397,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2s_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -478,4 +502,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=d30e8293bd8e2950 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/fcntlmodule.c.h b/Modules/clinic/fcntlmodule.c.h index 00a929064ba700..85bfdefdded027 100644 --- a/Modules/clinic/fcntlmodule.c.h +++ b/Modules/clinic/fcntlmodule.c.h @@ -6,17 +6,23 @@ PyDoc_STRVAR(fcntl_fcntl__doc__, "fcntl($module, fd, cmd, arg=0, /)\n" "--\n" "\n" -"Perform the operation `cmd` on file descriptor fd.\n" +"Perform the operation cmd on file descriptor fd.\n" "\n" -"The values used for `cmd` are operating system dependent, and are available\n" -"as constants in the fcntl module, using the same names as used in\n" -"the relevant C header files. The argument arg is optional, and\n" -"defaults to 0; it may be an int or a string. If arg is given as a string,\n" -"the return value of fcntl is a string of that length, containing the\n" -"resulting value put in the arg buffer by the operating system. The length\n" -"of the arg string is not allowed to exceed 1024 bytes. If the arg given\n" -"is an integer or if none is specified, the result value is an integer\n" -"corresponding to the return value of the fcntl call in the C code."); +"The values used for cmd are operating system dependent, and are\n" +"available as constants in the fcntl module, using the same names as used\n" +"in the relevant C header files. The argument arg is optional, and\n" +"defaults to 0; it may be an integer, a bytes-like object or a string.\n" +"If arg is given as a string, it will be encoded to binary using the\n" +"UTF-8 encoding.\n" +"\n" +"If the arg given is an integer or if none is specified, the result value\n" +"is an integer corresponding to the return value of the fcntl() call in\n" +"the C code.\n" +"\n" +"If arg is given as a bytes-like object, the return value of fcntl() is a\n" +"bytes object of that length, containing the resulting value put in the\n" +"arg buffer by the operating system. The length of the arg buffer is not\n" +"allowed to exceed 1024 bytes."); #define FCNTL_FCNTL_METHODDEF \ {"fcntl", (PyCFunction)(void(*)(void))fcntl_fcntl, METH_FASTCALL, fcntl_fcntl__doc__}, @@ -63,34 +69,34 @@ PyDoc_STRVAR(fcntl_ioctl__doc__, "ioctl($module, fd, request, arg=0, mutate_flag=True, /)\n" "--\n" "\n" -"Perform the operation `request` on file descriptor `fd`.\n" +"Perform the operation request on file descriptor fd.\n" "\n" -"The values used for `request` are operating system dependent, and are available\n" -"as constants in the fcntl or termios library modules, using the same names as\n" -"used in the relevant C header files.\n" +"The values used for request are operating system dependent, and are\n" +"available as constants in the fcntl or termios library modules, using\n" +"the same names as used in the relevant C header files.\n" "\n" -"The argument `arg` is optional, and defaults to 0; it may be an int or a\n" -"buffer containing character data (most likely a string or an array).\n" +"The argument arg is optional, and defaults to 0; it may be an integer, a\n" +"bytes-like object or a string. If arg is given as a string, it will be\n" +"encoded to binary using the UTF-8 encoding.\n" "\n" -"If the argument is a mutable buffer (such as an array) and if the\n" -"mutate_flag argument (which is only allowed in this case) is true then the\n" -"buffer is (in effect) passed to the operating system and changes made by\n" -"the OS will be reflected in the contents of the buffer after the call has\n" -"returned. The return value is the integer returned by the ioctl system\n" -"call.\n" +"If the arg given is an integer or if none is specified, the result value\n" +"is an integer corresponding to the return value of the ioctl() call in\n" +"the C code.\n" "\n" -"If the argument is a mutable buffer and the mutable_flag argument is false,\n" -"the behavior is as if a string had been passed.\n" +"If the argument is a mutable buffer (such as a bytearray) and the\n" +"mutate_flag argument is true (default) then the buffer is (in effect)\n" +"passed to the operating system and changes made by the OS will be\n" +"reflected in the contents of the buffer after the call has returned.\n" +"The return value is the integer returned by the ioctl() system call.\n" "\n" -"If the argument is an immutable buffer (most likely a string) then a copy\n" -"of the buffer is passed to the operating system and the return value is a\n" -"string of the same length containing whatever the operating system put in\n" -"the buffer. The length of the arg buffer in this case is not allowed to\n" -"exceed 1024 bytes.\n" +"If the argument is a mutable buffer and the mutable_flag argument is\n" +"false, the behavior is as if an immutable buffer had been passed.\n" "\n" -"If the arg given is an integer or if none is specified, the result value is\n" -"an integer corresponding to the return value of the ioctl call in the C\n" -"code."); +"If the argument is an immutable buffer then a copy of the buffer is\n" +"passed to the operating system and the return value is a bytes object of\n" +"the same length containing whatever the operating system put in the\n" +"buffer. The length of the arg buffer in this case is not allowed to\n" +"exceed 1024 bytes."); #define FCNTL_IOCTL_METHODDEF \ {"ioctl", (PyCFunction)(void(*)(void))fcntl_ioctl, METH_FASTCALL, fcntl_ioctl__doc__}, @@ -147,7 +153,7 @@ PyDoc_STRVAR(fcntl_flock__doc__, "flock($module, fd, operation, /)\n" "--\n" "\n" -"Perform the lock operation `operation` on file descriptor `fd`.\n" +"Perform the lock operation on file descriptor fd.\n" "\n" "See the Unix manual page for flock(2) for details (On some systems, this\n" "function is emulated using fcntl())."); @@ -189,22 +195,22 @@ PyDoc_STRVAR(fcntl_lockf__doc__, "\n" "A wrapper around the fcntl() locking calls.\n" "\n" -"`fd` is the file descriptor of the file to lock or unlock, and operation is one\n" -"of the following values:\n" +"fd is the file descriptor of the file to lock or unlock, and operation\n" +"is one of the following values:\n" "\n" " LOCK_UN - unlock\n" " LOCK_SH - acquire a shared lock\n" " LOCK_EX - acquire an exclusive lock\n" "\n" "When operation is LOCK_SH or LOCK_EX, it can also be bitwise ORed with\n" -"LOCK_NB to avoid blocking on lock acquisition. If LOCK_NB is used and the\n" -"lock cannot be acquired, an OSError will be raised and the exception will\n" -"have an errno attribute set to EACCES or EAGAIN (depending on the operating\n" -"system -- for portability, check for either value).\n" +"LOCK_NB to avoid blocking on lock acquisition. If LOCK_NB is used and\n" +"the lock cannot be acquired, an OSError will be raised and the exception\n" +"will have an errno attribute set to EACCES or EAGAIN (depending on the\n" +"operating system -- for portability, check for either value).\n" "\n" -"`len` is the number of bytes to lock, with the default meaning to lock to\n" -"EOF. `start` is the byte offset, relative to `whence`, to that the lock\n" -"starts. `whence` is as with fileobj.seek(), specifically:\n" +"len is the number of bytes to lock, with the default meaning to lock to\n" +"EOF. start is the byte offset, relative to whence, to that the lock\n" +"starts. whence is as with fileobj.seek(), specifically:\n" "\n" " 0 - relative to the start of the file (SEEK_SET)\n" " 1 - relative to the current buffer position (SEEK_CUR)\n" @@ -264,4 +270,4 @@ fcntl_lockf(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=65a16bc64c7b4de4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a9345d258926adb7 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 4c2c8acd8f69d8..9df73b187bb827 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -108,9 +108,7 @@ PyDoc_STRVAR(math_factorial__doc__, "factorial($module, n, /)\n" "--\n" "\n" -"Find n!.\n" -"\n" -"Raise a ValueError if x is negative or non-integral."); +"Find n!."); #define MATH_FACTORIAL_METHODDEF \ {"factorial", (PyCFunction)math_factorial, METH_O, math_factorial__doc__}, @@ -1112,4 +1110,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=634773bd18cd3f93 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=77e7b8c161c39843 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/md5module.c.h b/Modules/clinic/md5module.c.h index 9ca4f6528ce8f5..f76902586dddb2 100644 --- a/Modules/clinic/md5module.c.h +++ b/Modules/clinic/md5module.c.h @@ -89,7 +89,7 @@ MD5Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_md5_md5__doc__, -"md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new MD5 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_md5_md5__doc__, {"md5", _PyCFunction_CAST(_md5_md5), METH_FASTCALL|METH_KEYWORDS, _md5_md5__doc__}, static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity); +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _md5_md5_impl(module, string, usedforsecurity); + return_value = _md5_md5_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=73f4d2034d9fcc63 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=920fe54b9ed06f92 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/mmapmodule.c.h b/Modules/clinic/mmapmodule.c.h new file mode 100644 index 00000000000000..e00f82fb88bdd1 --- /dev/null +++ b/Modules/clinic/mmapmodule.c.h @@ -0,0 +1,769 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#include "pycore_abstract.h" // _PyNumber_Index() +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + +PyDoc_STRVAR(mmap_mmap_close__doc__, +"close($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_CLOSE_METHODDEF \ + {"close", (PyCFunction)mmap_mmap_close, METH_NOARGS, mmap_mmap_close__doc__}, + +static PyObject * +mmap_mmap_close_impl(mmap_object *self); + +static PyObject * +mmap_mmap_close(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_close_impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_read_byte__doc__, +"read_byte($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_READ_BYTE_METHODDEF \ + {"read_byte", (PyCFunction)mmap_mmap_read_byte, METH_NOARGS, mmap_mmap_read_byte__doc__}, + +static PyObject * +mmap_mmap_read_byte_impl(mmap_object *self); + +static PyObject * +mmap_mmap_read_byte(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_read_byte_impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_readline__doc__, +"readline($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_READLINE_METHODDEF \ + {"readline", (PyCFunction)mmap_mmap_readline, METH_NOARGS, mmap_mmap_readline__doc__}, + +static PyObject * +mmap_mmap_readline_impl(mmap_object *self); + +static PyObject * +mmap_mmap_readline(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_readline_impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_read__doc__, +"read($self, n=None, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_READ_METHODDEF \ + {"read", _PyCFunction_CAST(mmap_mmap_read), METH_FASTCALL, mmap_mmap_read__doc__}, + +static PyObject * +mmap_mmap_read_impl(mmap_object *self, Py_ssize_t num_bytes); + +static PyObject * +mmap_mmap_read(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t num_bytes = PY_SSIZE_T_MAX; + + if (!_PyArg_CheckPositional("read", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + if (!_Py_convert_optional_to_ssize_t(args[0], &num_bytes)) { + goto exit; + } +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_read_impl((mmap_object *)self, num_bytes); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_find__doc__, +"find($self, view, start=None, end=None, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_FIND_METHODDEF \ + {"find", _PyCFunction_CAST(mmap_mmap_find), METH_FASTCALL, mmap_mmap_find__doc__}, + +static PyObject * +mmap_mmap_find_impl(mmap_object *self, Py_buffer *view, PyObject *start, + PyObject *end); + +static PyObject * +mmap_mmap_find(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer view = {NULL, NULL}; + PyObject *start = Py_None; + PyObject *end = Py_None; + + if (!_PyArg_CheckPositional("find", nargs, 1, 3)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &view, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + start = args[1]; + if (nargs < 3) { + goto skip_optional; + } + end = args[2]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_find_impl((mmap_object *)self, &view, start, end); + Py_END_CRITICAL_SECTION(); + +exit: + /* Cleanup for view */ + if (view.obj) { + PyBuffer_Release(&view); + } + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_rfind__doc__, +"rfind($self, view, start=None, end=None, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_RFIND_METHODDEF \ + {"rfind", _PyCFunction_CAST(mmap_mmap_rfind), METH_FASTCALL, mmap_mmap_rfind__doc__}, + +static PyObject * +mmap_mmap_rfind_impl(mmap_object *self, Py_buffer *view, PyObject *start, + PyObject *end); + +static PyObject * +mmap_mmap_rfind(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_buffer view = {NULL, NULL}; + PyObject *start = Py_None; + PyObject *end = Py_None; + + if (!_PyArg_CheckPositional("rfind", nargs, 1, 3)) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &view, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + start = args[1]; + if (nargs < 3) { + goto skip_optional; + } + end = args[2]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_rfind_impl((mmap_object *)self, &view, start, end); + Py_END_CRITICAL_SECTION(); + +exit: + /* Cleanup for view */ + if (view.obj) { + PyBuffer_Release(&view); + } + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_write__doc__, +"write($self, bytes, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_WRITE_METHODDEF \ + {"write", (PyCFunction)mmap_mmap_write, METH_O, mmap_mmap_write__doc__}, + +static PyObject * +mmap_mmap_write_impl(mmap_object *self, Py_buffer *data); + +static PyObject * +mmap_mmap_write(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer data = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_write_impl((mmap_object *)self, &data); + Py_END_CRITICAL_SECTION(); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_write_byte__doc__, +"write_byte($self, byte, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_WRITE_BYTE_METHODDEF \ + {"write_byte", (PyCFunction)mmap_mmap_write_byte, METH_O, mmap_mmap_write_byte__doc__}, + +static PyObject * +mmap_mmap_write_byte_impl(mmap_object *self, unsigned char value); + +static PyObject * +mmap_mmap_write_byte(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned char value; + + { + long ival = PyLong_AsLong(arg); + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + else if (ival < 0) { + PyErr_SetString(PyExc_OverflowError, + "unsigned byte integer is less than minimum"); + goto exit; + } + else if (ival > UCHAR_MAX) { + PyErr_SetString(PyExc_OverflowError, + "unsigned byte integer is greater than maximum"); + goto exit; + } + else { + value = (unsigned char) ival; + } + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_write_byte_impl((mmap_object *)self, value); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_size__doc__, +"size($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_SIZE_METHODDEF \ + {"size", (PyCFunction)mmap_mmap_size, METH_NOARGS, mmap_mmap_size__doc__}, + +static PyObject * +mmap_mmap_size_impl(mmap_object *self); + +static PyObject * +mmap_mmap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_size_impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_resize__doc__, +"resize($self, newsize, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_RESIZE_METHODDEF \ + {"resize", (PyCFunction)mmap_mmap_resize, METH_O, mmap_mmap_resize__doc__}, + +static PyObject * +mmap_mmap_resize_impl(mmap_object *self, Py_ssize_t new_size); + +static PyObject * +mmap_mmap_resize(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_ssize_t new_size; + + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(arg); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + new_size = ival; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_resize_impl((mmap_object *)self, new_size); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_tell__doc__, +"tell($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_TELL_METHODDEF \ + {"tell", (PyCFunction)mmap_mmap_tell, METH_NOARGS, mmap_mmap_tell__doc__}, + +static PyObject * +mmap_mmap_tell_impl(mmap_object *self); + +static PyObject * +mmap_mmap_tell(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_tell_impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_flush__doc__, +"flush($self, offset=0, size=None, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_FLUSH_METHODDEF \ + {"flush", _PyCFunction_CAST(mmap_mmap_flush), METH_FASTCALL, mmap_mmap_flush__doc__}, + +static PyObject * +mmap_mmap_flush_impl(mmap_object *self, Py_ssize_t offset, + PyObject *size_obj); + +static PyObject * +mmap_mmap_flush(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t offset = 0; + PyObject *size_obj = Py_None; + + if (!_PyArg_CheckPositional("flush", nargs, 0, 2)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + offset = ival; + } + if (nargs < 2) { + goto skip_optional; + } + size_obj = args[1]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_flush_impl((mmap_object *)self, offset, size_obj); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_seek__doc__, +"seek($self, pos, whence=0, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_SEEK_METHODDEF \ + {"seek", _PyCFunction_CAST(mmap_mmap_seek), METH_FASTCALL, mmap_mmap_seek__doc__}, + +static PyObject * +mmap_mmap_seek_impl(mmap_object *self, Py_ssize_t dist, int how); + +static PyObject * +mmap_mmap_seek(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t dist; + int how = 0; + + if (!_PyArg_CheckPositional("seek", nargs, 1, 2)) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dist = ival; + } + if (nargs < 2) { + goto skip_optional; + } + how = PyLong_AsInt(args[1]); + if (how == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_seek_impl((mmap_object *)self, dist, how); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap_seekable__doc__, +"seekable($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_SEEKABLE_METHODDEF \ + {"seekable", (PyCFunction)mmap_mmap_seekable, METH_NOARGS, mmap_mmap_seekable__doc__}, + +static PyObject * +mmap_mmap_seekable_impl(mmap_object *self); + +static PyObject * +mmap_mmap_seekable(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return mmap_mmap_seekable_impl((mmap_object *)self); +} + +PyDoc_STRVAR(mmap_mmap_move__doc__, +"move($self, dest, src, count, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_MOVE_METHODDEF \ + {"move", _PyCFunction_CAST(mmap_mmap_move), METH_FASTCALL, mmap_mmap_move__doc__}, + +static PyObject * +mmap_mmap_move_impl(mmap_object *self, Py_ssize_t dest, Py_ssize_t src, + Py_ssize_t cnt); + +static PyObject * +mmap_mmap_move(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t dest; + Py_ssize_t src; + Py_ssize_t cnt; + + if (!_PyArg_CheckPositional("move", nargs, 3, 3)) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + dest = ival; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + src = ival; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + cnt = ival; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_move_impl((mmap_object *)self, dest, src, cnt); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +PyDoc_STRVAR(mmap_mmap___enter____doc__, +"__enter__($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP___ENTER___METHODDEF \ + {"__enter__", (PyCFunction)mmap_mmap___enter__, METH_NOARGS, mmap_mmap___enter____doc__}, + +static PyObject * +mmap_mmap___enter___impl(mmap_object *self); + +static PyObject * +mmap_mmap___enter__(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap___enter___impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(mmap_mmap___exit____doc__, +"__exit__($self, exc_type, exc_value, traceback, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP___EXIT___METHODDEF \ + {"__exit__", _PyCFunction_CAST(mmap_mmap___exit__), METH_FASTCALL, mmap_mmap___exit____doc__}, + +static PyObject * +mmap_mmap___exit___impl(mmap_object *self, PyObject *exc_type, + PyObject *exc_value, PyObject *traceback); + +static PyObject * +mmap_mmap___exit__(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *exc_type; + PyObject *exc_value; + PyObject *traceback; + + if (!_PyArg_CheckPositional("__exit__", nargs, 3, 3)) { + goto exit; + } + exc_type = args[0]; + exc_value = args[1]; + traceback = args[2]; + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap___exit___impl((mmap_object *)self, exc_type, exc_value, traceback); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +#if defined(MS_WINDOWS) + +PyDoc_STRVAR(mmap_mmap___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)mmap_mmap___sizeof__, METH_NOARGS, mmap_mmap___sizeof____doc__}, + +static PyObject * +mmap_mmap___sizeof___impl(mmap_object *self); + +static PyObject * +mmap_mmap___sizeof__(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap___sizeof___impl((mmap_object *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +#endif /* defined(MS_WINDOWS) */ + +#if (defined(MS_WINDOWS) && defined(Py_DEBUG)) + +PyDoc_STRVAR(mmap_mmap__protect__doc__, +"_protect($self, flNewProtect, start, length, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP__PROTECT_METHODDEF \ + {"_protect", _PyCFunction_CAST(mmap_mmap__protect), METH_FASTCALL, mmap_mmap__protect__doc__}, + +static PyObject * +mmap_mmap__protect_impl(mmap_object *self, unsigned int flNewProtect, + Py_ssize_t start, Py_ssize_t length); + +static PyObject * +mmap_mmap__protect(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + unsigned int flNewProtect; + Py_ssize_t start; + Py_ssize_t length; + + if (!_PyArg_CheckPositional("_protect", nargs, 3, 3)) { + goto exit; + } + flNewProtect = (unsigned int)PyLong_AsUnsignedLongMask(args[0]); + if (flNewProtect == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + start = ival; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + length = ival; + } + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap__protect_impl((mmap_object *)self, flNewProtect, start, length); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +#endif /* (defined(MS_WINDOWS) && defined(Py_DEBUG)) */ + +#if defined(HAVE_MADVISE) + +PyDoc_STRVAR(mmap_mmap_madvise__doc__, +"madvise($self, option, start=0, length=None, /)\n" +"--\n" +"\n"); + +#define MMAP_MMAP_MADVISE_METHODDEF \ + {"madvise", _PyCFunction_CAST(mmap_mmap_madvise), METH_FASTCALL, mmap_mmap_madvise__doc__}, + +static PyObject * +mmap_mmap_madvise_impl(mmap_object *self, int option, Py_ssize_t start, + PyObject *length_obj); + +static PyObject * +mmap_mmap_madvise(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int option; + Py_ssize_t start = 0; + PyObject *length_obj = Py_None; + + if (!_PyArg_CheckPositional("madvise", nargs, 1, 3)) { + goto exit; + } + option = PyLong_AsInt(args[0]); + if (option == -1 && PyErr_Occurred()) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + start = ival; + } + if (nargs < 3) { + goto skip_optional; + } + length_obj = args[2]; +skip_optional: + Py_BEGIN_CRITICAL_SECTION(self); + return_value = mmap_mmap_madvise_impl((mmap_object *)self, option, start, length_obj); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + +#endif /* defined(HAVE_MADVISE) */ + +#ifndef MMAP_MMAP___SIZEOF___METHODDEF + #define MMAP_MMAP___SIZEOF___METHODDEF +#endif /* !defined(MMAP_MMAP___SIZEOF___METHODDEF) */ + +#ifndef MMAP_MMAP__PROTECT_METHODDEF + #define MMAP_MMAP__PROTECT_METHODDEF +#endif /* !defined(MMAP_MMAP__PROTECT_METHODDEF) */ + +#ifndef MMAP_MMAP_MADVISE_METHODDEF + #define MMAP_MMAP_MADVISE_METHODDEF +#endif /* !defined(MMAP_MMAP_MADVISE_METHODDEF) */ +/*[clinic end generated code: output=fe4e2131abc1e971 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 6b8cc3d07ab01c..87a17935507b9c 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -380,7 +380,7 @@ PyDoc_STRVAR(os_chdir__doc__, "\n" "path may always be specified as a string.\n" "On some platforms, path may also be specified as an open file descriptor.\n" -" If this functionality is unavailable, using it raises an exception."); +"If this functionality is unavailable, using it raises an exception."); #define OS_CHDIR_METHODDEF \ {"chdir", _PyCFunction_CAST(os_chdir), METH_FASTCALL|METH_KEYWORDS, os_chdir__doc__}, @@ -12727,6 +12727,80 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* defined(__EMSCRIPTEN__) */ +#if defined(__EMSCRIPTEN__) + +PyDoc_STRVAR(os__emscripten_log__doc__, +"_emscripten_log($module, /, arg)\n" +"--\n" +"\n" +"Log something to the JS console. Emscripten only."); + +#define OS__EMSCRIPTEN_LOG_METHODDEF \ + {"_emscripten_log", _PyCFunction_CAST(os__emscripten_log), METH_FASTCALL|METH_KEYWORDS, os__emscripten_log__doc__}, + +static PyObject * +os__emscripten_log_impl(PyObject *module, const char *arg); + +static PyObject * +os__emscripten_log(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(arg), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"arg", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_emscripten_log", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + const char *arg; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("_emscripten_log", "argument 'arg'", "str", args[0]); + goto exit; + } + Py_ssize_t arg_length; + arg = PyUnicode_AsUTF8AndSize(args[0], &arg_length); + if (arg == NULL) { + goto exit; + } + if (strlen(arg) != (size_t)arg_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + return_value = os__emscripten_log_impl(module, arg); + +exit: + return return_value; +} + +#endif /* defined(__EMSCRIPTEN__) */ + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -13398,4 +13472,8 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=f7b5635e0b948be4 input=a9049054013a1b77]*/ + +#ifndef OS__EMSCRIPTEN_LOG_METHODDEF + #define OS__EMSCRIPTEN_LOG_METHODDEF +#endif /* !defined(OS__EMSCRIPTEN_LOG_METHODDEF) */ +/*[clinic end generated code: output=9e5f9b9ce732a534 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 13210e3be0f747..e178547060446e 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_SINGLETON() #endif +#include "pycore_long.h" // _PyLong_UnsignedLongLong_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, @@ -408,6 +409,131 @@ pyexpat_xmlparser_UseForeignDTD(PyObject *self, PyTypeObject *cls, PyObject *con #endif /* (XML_COMBINED_VERSION >= 19505) */ +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__, +"SetAllocTrackerActivationThreshold($self, threshold, /)\n" +"--\n" +"\n" +"Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM.\n" +"\n" +"By default, parser objects have an allocation activation threshold of 64 MiB."); + +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF \ + {"SetAllocTrackerActivationThreshold", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerActivationThreshold), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerActivationThreshold__doc__}, + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold); + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetAllocTrackerActivationThreshold", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + unsigned long long threshold; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLongLong_Converter(args[0], &threshold)) { + goto exit; + } + return_value = pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl((xmlparseobject *)self, cls, threshold); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20702) */ + +#if (XML_COMBINED_VERSION >= 20702) + +PyDoc_STRVAR(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__, +"SetAllocTrackerMaximumAmplification($self, max_factor, /)\n" +"--\n" +"\n" +"Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated.\n" +"\n" +"The amplification factor is calculated as \"allocated / direct\" while parsing,\n" +"where \"direct\" is the number of bytes read from the primary document in parsing\n" +"and \"allocated\" is the number of bytes of dynamic memory allocated in the parser\n" +"hierarchy.\n" +"\n" +"The \'max_factor\' value must be a non-NaN floating point value greater than\n" +"or equal to 1.0. Amplification factors greater than 100.0 can be observed\n" +"near the start of parsing even with benign files in practice. In particular,\n" +"the activation threshold should be carefully chosen to avoid false positives.\n" +"\n" +"By default, parser objects have a maximum amplification factor of 100.0."); + +#define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF \ + {"SetAllocTrackerMaximumAmplification", _PyCFunction_CAST(pyexpat_xmlparser_SetAllocTrackerMaximumAmplification), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, pyexpat_xmlparser_SetAllocTrackerMaximumAmplification__doc__}, + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor); + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + # define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty) + #else + # define KWTUPLE NULL + #endif + + static const char * const _keywords[] = {"", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "SetAllocTrackerMaximumAmplification", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + float max_factor; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_CheckExact(args[0])) { + max_factor = (float) (PyFloat_AS_DOUBLE(args[0])); + } + else + { + max_factor = (float) PyFloat_AsDouble(args[0]); + if (max_factor == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + return_value = pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl((xmlparseobject *)self, cls, max_factor); + +exit: + return return_value; +} + +#endif /* (XML_COMBINED_VERSION >= 20702) */ + PyDoc_STRVAR(pyexpat_ParserCreate__doc__, "ParserCreate($module, /, encoding=None, namespace_separator=None,\n" " intern=<unrepresentable>)\n" @@ -552,4 +678,12 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=4dbdc959c67dc2d5 input=a9049054013a1b77]*/ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF) */ + +#ifndef PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF + #define PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF +#endif /* !defined(PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF) */ +/*[clinic end generated code: output=e73935658c04c83e input=a9049054013a1b77]*/ diff --git a/Modules/clinic/resource.c.h b/Modules/clinic/resource.c.h index 9eda7de27532a1..e4ef93900d1797 100644 --- a/Modules/clinic/resource.c.h +++ b/Modules/clinic/resource.c.h @@ -2,6 +2,8 @@ preserve [clinic start generated code]*/ +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + #if defined(HAVE_GETRUSAGE) PyDoc_STRVAR(resource_getrusage__doc__, @@ -66,7 +68,7 @@ PyDoc_STRVAR(resource_setrlimit__doc__, "\n"); #define RESOURCE_SETRLIMIT_METHODDEF \ - {"setrlimit", (PyCFunction)(void(*)(void))resource_setrlimit, METH_FASTCALL, resource_setrlimit__doc__}, + {"setrlimit", _PyCFunction_CAST(resource_setrlimit), METH_FASTCALL, resource_setrlimit__doc__}, static PyObject * resource_setrlimit_impl(PyObject *module, int resource, PyObject *limits); @@ -78,8 +80,7 @@ resource_setrlimit(PyObject *module, PyObject *const *args, Py_ssize_t nargs) int resource; PyObject *limits; - if (nargs != 2) { - PyErr_Format(PyExc_TypeError, "setrlimit expected 2 arguments, got %zd", nargs); + if (!_PyArg_CheckPositional("setrlimit", nargs, 2, 2)) { goto exit; } resource = PyLong_AsInt(args[0]); @@ -101,7 +102,7 @@ PyDoc_STRVAR(resource_prlimit__doc__, "\n"); #define RESOURCE_PRLIMIT_METHODDEF \ - {"prlimit", (PyCFunction)(void(*)(void))resource_prlimit, METH_FASTCALL, resource_prlimit__doc__}, + {"prlimit", _PyCFunction_CAST(resource_prlimit), METH_FASTCALL, resource_prlimit__doc__}, static PyObject * resource_prlimit_impl(PyObject *module, pid_t pid, int resource, @@ -115,12 +116,7 @@ resource_prlimit(PyObject *module, PyObject *const *args, Py_ssize_t nargs) int resource; PyObject *limits = Py_None; - if (nargs < 2) { - PyErr_Format(PyExc_TypeError, "prlimit expected at least 2 arguments, got %zd", nargs); - goto exit; - } - if (nargs > 3) { - PyErr_Format(PyExc_TypeError, "prlimit expected at most 3 arguments, got %zd", nargs); + if (!_PyArg_CheckPositional("prlimit", nargs, 2, 3)) { goto exit; } pid = PyLong_AsPid(args[0]); @@ -178,4 +174,4 @@ resource_getpagesize(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef RESOURCE_PRLIMIT_METHODDEF #define RESOURCE_PRLIMIT_METHODDEF #endif /* !defined(RESOURCE_PRLIMIT_METHODDEF) */ -/*[clinic end generated code: output=e45883ace510414a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8e905b2f5c35170e input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha1module.c.h b/Modules/clinic/sha1module.c.h index 3e5fd1a41ce21f..4a58d0cd9b82a4 100644 --- a/Modules/clinic/sha1module.c.h +++ b/Modules/clinic/sha1module.c.h @@ -89,7 +89,7 @@ SHA1Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha1_sha1__doc__, -"sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA1 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_sha1_sha1__doc__, {"sha1", _PyCFunction_CAST(_sha1_sha1), METH_FASTCALL|METH_KEYWORDS, _sha1_sha1__doc__}, static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha1_sha1_impl(module, string, usedforsecurity); + return_value = _sha1_sha1_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=06161e87e2d645d4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fd5a917404b68c4f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha2module.c.h b/Modules/clinic/sha2module.c.h index 26612125e75df9..07be91e4f6c68f 100644 --- a/Modules/clinic/sha2module.c.h +++ b/Modules/clinic/sha2module.c.h @@ -169,7 +169,7 @@ SHA512Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha2_sha256__doc__, -"sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha256($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-256 hash object; optionally initialized with a string."); @@ -178,7 +178,8 @@ PyDoc_STRVAR(_sha2_sha256__doc__, {"sha256", _PyCFunction_CAST(_sha2_sha256), METH_FASTCALL|METH_KEYWORDS, _sha2_sha256__doc__}, static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -186,7 +187,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -195,7 +196,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -204,17 +205,18 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -225,7 +227,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -234,19 +236,25 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha256_impl(module, string, usedforsecurity); + return_value = _sha2_sha256_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha224__doc__, -"sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha224($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-224 hash object; optionally initialized with a string."); @@ -255,7 +263,8 @@ PyDoc_STRVAR(_sha2_sha224__doc__, {"sha224", _PyCFunction_CAST(_sha2_sha224), METH_FASTCALL|METH_KEYWORDS, _sha2_sha224__doc__}, static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -263,7 +272,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -272,7 +281,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -281,17 +290,18 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -302,7 +312,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -311,19 +321,25 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha224_impl(module, string, usedforsecurity); + return_value = _sha2_sha224_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha512__doc__, -"sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha512($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-512 hash object; optionally initialized with a string."); @@ -332,7 +348,8 @@ PyDoc_STRVAR(_sha2_sha512__doc__, {"sha512", _PyCFunction_CAST(_sha2_sha512), METH_FASTCALL|METH_KEYWORDS, _sha2_sha512__doc__}, static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -340,7 +357,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -349,7 +366,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -358,17 +375,18 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -379,7 +397,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -388,19 +406,25 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha512_impl(module, string, usedforsecurity); + return_value = _sha2_sha512_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha384__doc__, -"sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha384($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-384 hash object; optionally initialized with a string."); @@ -409,7 +433,8 @@ PyDoc_STRVAR(_sha2_sha384__doc__, {"sha384", _PyCFunction_CAST(_sha2_sha384), METH_FASTCALL|METH_KEYWORDS, _sha2_sha384__doc__}, static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -417,7 +442,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -426,7 +451,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -435,17 +460,18 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -456,7 +482,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -465,14 +491,20 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha384_impl(module, string, usedforsecurity); + return_value = _sha2_sha384_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=af11090855b7c85a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90625b237c774a9f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index 25f72b74f801db..121be2c0758695 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -10,13 +10,14 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_sha3_new__doc__, -"sha3_224(data=b\'\', /, *, usedforsecurity=True)\n" +"sha3_224(data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA3 hash object."); static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity); +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string); static PyObject * py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -24,7 +25,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -33,7 +34,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -42,40 +43,51 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(fastargs[1]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[1]) { + usedforsecurity = PyObject_IsTrue(fastargs[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[2]; skip_optional_kwonly: - return_value = py_sha3_new_impl(type, data, usedforsecurity); + return_value = py_sha3_new_impl(type, data_obj, usedforsecurity, string); exit: return return_value; @@ -158,24 +170,57 @@ _sha3_sha3_224_update(PyObject *self, PyObject *data) } PyDoc_STRVAR(_sha3_shake_128_digest__doc__, -"digest($self, length, /)\n" +"digest($self, /, length)\n" "--\n" "\n" "Return the digest value as a bytes object."); #define _SHA3_SHAKE_128_DIGEST_METHODDEF \ - {"digest", (PyCFunction)_sha3_shake_128_digest, METH_O, _sha3_shake_128_digest__doc__}, + {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_digest(PyObject *self, PyObject *arg) +_sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "digest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length); @@ -185,24 +230,57 @@ _sha3_shake_128_digest(PyObject *self, PyObject *arg) } PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, -"hexdigest($self, length, /)\n" +"hexdigest($self, /, length)\n" "--\n" "\n" "Return the digest value as a string of hexadecimal digits."); #define _SHA3_SHAKE_128_HEXDIGEST_METHODDEF \ - {"hexdigest", (PyCFunction)_sha3_shake_128_hexdigest, METH_O, _sha3_shake_128_hexdigest__doc__}, + {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) +_sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "hexdigest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length); @@ -210,4 +288,4 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=5b3ac1c06c6899ea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/socketmodule.c.h b/Modules/clinic/socketmodule.c.h index 70ebbaa876b32b..2b27640cd0aaf3 100644 --- a/Modules/clinic/socketmodule.c.h +++ b/Modules/clinic/socketmodule.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_long.h" // _PyLong_UInt16_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_socket_socket_close__doc__, @@ -314,7 +315,7 @@ static PyObject * _socket_if_nametoindex(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; - PyObject *oname; + PyObject *oname = NULL; if (!PyUnicode_FSConverter(arg, &oname)) { goto exit; @@ -322,6 +323,9 @@ _socket_if_nametoindex(PyObject *module, PyObject *arg) return_value = _socket_if_nametoindex_impl(module, oname); exit: + /* Cleanup for oname */ + Py_XDECREF(oname); + return return_value; } @@ -369,4 +373,4 @@ _socket_if_indextoname(PyObject *module, PyObject *arg) #ifndef _SOCKET_IF_INDEXTONAME_METHODDEF #define _SOCKET_IF_INDEXTONAME_METHODDEF #endif /* !defined(_SOCKET_IF_INDEXTONAME_METHODDEF) */ -/*[clinic end generated code: output=c971b79d2193b426 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1bbfda36b02c29c2 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/symtablemodule.c.h b/Modules/clinic/symtablemodule.c.h index 2ecd3afc00d2be..bd55d77c5409e9 100644 --- a/Modules/clinic/symtablemodule.c.h +++ b/Modules/clinic/symtablemodule.c.h @@ -22,7 +22,7 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyObject *source; - PyObject *filename; + PyObject *filename = NULL; const char *startstr; if (!_PyArg_CheckPositional("symtable", nargs, 3, 3)) { @@ -48,6 +48,9 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = _symtable_symtable_impl(module, source, filename, startstr); exit: + /* Cleanup for filename */ + Py_XDECREF(filename); + return return_value; } -/*[clinic end generated code: output=931964a76a72f850 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7a8545d9a1efe837 input=a9049054013a1b77]*/ diff --git a/Modules/cmathmodule.c b/Modules/cmathmodule.c index 81cbf0d554de3c..f12455b286b84d 100644 --- a/Modules/cmathmodule.c +++ b/Modules/cmathmodule.c @@ -163,8 +163,15 @@ special_type(double d) raised. */ -static Py_complex acos_special_values[7][7]; - +static Py_complex acos_special_values[7][7] = { + { {P34,INF}, {P,INF}, {P,INF}, {P,-INF}, {P,-INF}, {P34,-INF}, {N,INF} }, + { {P12,INF}, {U,U}, {U,U}, {U,U}, {U,U}, {P12,-INF}, {N,N} }, + { {P12,INF}, {U,U}, {P12,0.}, {P12,-0.}, {U,U}, {P12,-INF}, {P12,N} }, + { {P12,INF}, {U,U}, {P12,0.}, {P12,-0.}, {U,U}, {P12,-INF}, {P12,N} }, + { {P12,INF}, {U,U}, {U,U}, {U,U}, {U,U}, {P12,-INF}, {N,N} }, + { {P14,INF}, {0.,INF}, {0.,INF}, {0.,-INF}, {0.,-INF}, {P14,-INF}, {N,INF} }, + { {N,INF}, {N,N}, {N,N}, {N,N}, {N,N}, {N,-INF}, {N,N} } +}; /*[clinic input] cmath.acos -> Py_complex_protected @@ -202,7 +209,15 @@ cmath_acos_impl(PyObject *module, Py_complex z) } -static Py_complex acosh_special_values[7][7]; +static Py_complex acosh_special_values[7][7] = { + { {INF,-P34}, {INF,-P}, {INF,-P}, {INF,P}, {INF,P}, {INF,P34}, {INF,N} }, + { {INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {0.,-P12}, {0.,P12}, {U,U}, {INF,P12}, {N,P12} }, + { {INF,-P12}, {U,U}, {0.,-P12}, {0.,P12}, {U,U}, {INF,P12}, {N,P12} }, + { {INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P14}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,P14}, {INF,N} }, + { {INF,N}, {N,N}, {N,N}, {N,N}, {N,N}, {INF,N}, {N,N} } +}; /*[clinic input] cmath.acosh = cmath.acos @@ -257,7 +272,15 @@ cmath_asin_impl(PyObject *module, Py_complex z) } -static Py_complex asinh_special_values[7][7]; +static Py_complex asinh_special_values[7][7] = { + { {-INF,-P14}, {-INF,-0.}, {-INF,-0.}, {-INF,0.}, {-INF,0.}, {-INF,P14}, {-INF,N} }, + { {-INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {-INF,P12}, {N,N} }, + { {-INF,-P12}, {U,U}, {-0.,-0.}, {-0.,0.}, {U,U}, {-INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P14}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,P14}, {INF,N} }, + { {INF,N}, {N,N}, {N,-0.}, {N,0.}, {N,N}, {INF,N}, {N,N} } +}; /*[clinic input] cmath.asinh = cmath.acos @@ -318,7 +341,15 @@ cmath_atan_impl(PyObject *module, Py_complex z) } -static Py_complex atanh_special_values[7][7]; +static Py_complex atanh_special_values[7][7] = { + { {-0.,-P12}, {-0.,-P12}, {-0.,-P12}, {-0.,P12}, {-0.,P12}, {-0.,P12}, {-0.,N} }, + { {-0.,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {-0.,P12}, {N,N} }, + { {-0.,-P12}, {U,U}, {-0.,-0.}, {-0.,0.}, {U,U}, {-0.,P12}, {-0.,N} }, + { {0.,-P12}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {0.,P12}, {0.,N} }, + { {0.,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {0.,P12}, {N,N} }, + { {0.,-P12}, {0.,-P12}, {0.,-P12}, {0.,P12}, {0.,P12}, {0.,P12}, {0.,N} }, + { {0.,-P12}, {N,N}, {N,N}, {N,N}, {N,N}, {0.,P12}, {N,N} } +}; /*[clinic input] cmath.atanh = cmath.acos @@ -391,7 +422,15 @@ cmath_cos_impl(PyObject *module, Py_complex z) /* cosh(infinity + i*y) needs to be dealt with specially */ -static Py_complex cosh_special_values[7][7]; +static Py_complex cosh_special_values[7][7] = { + { {INF,N}, {U,U}, {INF,0.}, {INF,-0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {N,0.}, {U,U}, {1.,0.}, {1.,-0.}, {U,U}, {N,0.}, {N,0.} }, + { {N,0.}, {U,U}, {1.,-0.}, {1.,0.}, {U,U}, {N,0.}, {N,0.} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {INF,N}, {U,U}, {INF,-0.}, {INF,0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {N,N}, {N,0.}, {N,0.}, {N,N}, {N,N}, {N,N} } +}; /*[clinic input] cmath.cosh = cmath.acos @@ -453,7 +492,15 @@ cmath_cosh_impl(PyObject *module, Py_complex z) /* exp(infinity + i*y) and exp(-infinity + i*y) need special treatment for finite y */ -static Py_complex exp_special_values[7][7]; +static Py_complex exp_special_values[7][7] = { + { {0.,0.}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {0.,0.}, {0.,0.} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {N,N}, {U,U}, {1.,-0.}, {1.,0.}, {U,U}, {N,N}, {N,N} }, + { {N,N}, {U,U}, {1.,-0.}, {1.,0.}, {U,U}, {N,N}, {N,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {INF,N}, {U,U}, {INF,-0.}, {INF,0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {N,N}, {N,-0.}, {N,0.}, {N,N}, {N,N}, {N,N} } +}; /*[clinic input] cmath.exp = cmath.acos @@ -512,7 +559,15 @@ cmath_exp_impl(PyObject *module, Py_complex z) return r; } -static Py_complex log_special_values[7][7]; +static Py_complex log_special_values[7][7] = { + { {INF,-P34}, {INF,-P}, {INF,-P}, {INF,P}, {INF,P}, {INF,P34}, {INF,N} }, + { {INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {-INF,-P}, {-INF,P}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {-INF,-0.}, {-INF,0.}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P12}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,P12}, {N,N} }, + { {INF,-P14}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,P14}, {INF,N} }, + { {INF,N}, {N,N}, {N,N}, {N,N}, {N,N}, {INF,N}, {N,N} } +}; static Py_complex c_log(Py_complex z) @@ -628,7 +683,15 @@ cmath_sin_impl(PyObject *module, Py_complex z) /* sinh(infinity + i*y) needs to be dealt with specially */ -static Py_complex sinh_special_values[7][7]; +static Py_complex sinh_special_values[7][7] = { + { {INF,N}, {U,U}, {-INF,-0.}, {-INF,0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {0.,N}, {U,U}, {-0.,-0.}, {-0.,0.}, {U,U}, {0.,N}, {0.,N} }, + { {0.,N}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {0.,N}, {0.,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {INF,N}, {U,U}, {INF,-0.}, {INF,0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {N,N}, {N,-0.}, {N,0.}, {N,N}, {N,N}, {N,N} } +}; /*[clinic input] cmath.sinh = cmath.acos @@ -687,7 +750,15 @@ cmath_sinh_impl(PyObject *module, Py_complex z) } -static Py_complex sqrt_special_values[7][7]; +static Py_complex sqrt_special_values[7][7] = { + { {INF,-INF}, {0.,-INF}, {0.,-INF}, {0.,INF}, {0.,INF}, {INF,INF}, {N,INF} }, + { {INF,-INF}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,INF}, {N,N} }, + { {INF,-INF}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {INF,INF}, {N,N} }, + { {INF,-INF}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {INF,INF}, {N,N} }, + { {INF,-INF}, {U,U}, {U,U}, {U,U}, {U,U}, {INF,INF}, {N,N} }, + { {INF,-INF}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,INF}, {INF,N} }, + { {INF,-INF}, {N,N}, {N,N}, {N,N}, {N,N}, {INF,INF}, {N,N} } +}; /*[clinic input] cmath.sqrt = cmath.acos @@ -786,7 +857,15 @@ cmath_tan_impl(PyObject *module, Py_complex z) /* tanh(infinity + i*y) needs to be dealt with specially */ -static Py_complex tanh_special_values[7][7]; +static Py_complex tanh_special_values[7][7] = { + { {-1.,0.}, {U,U}, {-1.,-0.}, {-1.,0.}, {U,U}, {-1.,0.}, {-1.,0.} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {-0.0,N}, {U,U}, {-0.,-0.}, {-0.,0.}, {U,U}, {-0.0,N}, {-0.,N} }, + { {0.0,N}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {0.0,N}, {0.,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {1.,0.}, {U,U}, {1.,-0.}, {1.,0.}, {U,U}, {1.,0.}, {1.,0.} }, + { {N,N}, {N,N}, {N,-0.}, {N,0.}, {N,N}, {N,N}, {N,N} } +}; /*[clinic input] cmath.tanh = cmath.acos @@ -969,7 +1048,15 @@ cmath_polar_impl(PyObject *module, Py_complex z) */ -static Py_complex rect_special_values[7][7]; +static Py_complex rect_special_values[7][7] = { + { {INF,N}, {U,U}, {-INF,0.}, {-INF,-0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {0.,0.}, {U,U}, {-0.,0.}, {-0.,-0.}, {U,U}, {0.,0.}, {0.,0.} }, + { {0.,0.}, {U,U}, {0.,-0.}, {0.,0.}, {U,U}, {0.,0.}, {0.,0.} }, + { {N,N}, {U,U}, {U,U}, {U,U}, {U,U}, {N,N}, {N,N} }, + { {INF,N}, {U,U}, {INF,-0.}, {INF,0.}, {U,U}, {INF,N}, {INF,N} }, + { {N,N}, {N,N}, {N,0.}, {N,0.}, {N,N}, {N,N}, {N,N} } +}; /*[clinic input] cmath.rect @@ -1200,120 +1287,6 @@ cmath_exec(PyObject *mod) return -1; } - /* initialize special value tables */ - -#define INIT_SPECIAL_VALUES(NAME, BODY) { Py_complex* p = (Py_complex*)NAME; BODY } -#define C(REAL, IMAG) p->real = REAL; p->imag = IMAG; ++p; - - INIT_SPECIAL_VALUES(acos_special_values, { - C(P34,INF) C(P,INF) C(P,INF) C(P,-INF) C(P,-INF) C(P34,-INF) C(N,INF) - C(P12,INF) C(U,U) C(U,U) C(U,U) C(U,U) C(P12,-INF) C(N,N) - C(P12,INF) C(U,U) C(P12,0.) C(P12,-0.) C(U,U) C(P12,-INF) C(P12,N) - C(P12,INF) C(U,U) C(P12,0.) C(P12,-0.) C(U,U) C(P12,-INF) C(P12,N) - C(P12,INF) C(U,U) C(U,U) C(U,U) C(U,U) C(P12,-INF) C(N,N) - C(P14,INF) C(0.,INF) C(0.,INF) C(0.,-INF) C(0.,-INF) C(P14,-INF) C(N,INF) - C(N,INF) C(N,N) C(N,N) C(N,N) C(N,N) C(N,-INF) C(N,N) - }) - - INIT_SPECIAL_VALUES(acosh_special_values, { - C(INF,-P34) C(INF,-P) C(INF,-P) C(INF,P) C(INF,P) C(INF,P34) C(INF,N) - C(INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(0.,-P12) C(0.,P12) C(U,U) C(INF,P12) C(N,P12) - C(INF,-P12) C(U,U) C(0.,-P12) C(0.,P12) C(U,U) C(INF,P12) C(N,P12) - C(INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,P12) C(N,N) - C(INF,-P14) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,P14) C(INF,N) - C(INF,N) C(N,N) C(N,N) C(N,N) C(N,N) C(INF,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(asinh_special_values, { - C(-INF,-P14) C(-INF,-0.) C(-INF,-0.) C(-INF,0.) C(-INF,0.) C(-INF,P14) C(-INF,N) - C(-INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(-INF,P12) C(N,N) - C(-INF,-P12) C(U,U) C(-0.,-0.) C(-0.,0.) C(U,U) C(-INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,P12) C(N,N) - C(INF,-P14) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,P14) C(INF,N) - C(INF,N) C(N,N) C(N,-0.) C(N,0.) C(N,N) C(INF,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(atanh_special_values, { - C(-0.,-P12) C(-0.,-P12) C(-0.,-P12) C(-0.,P12) C(-0.,P12) C(-0.,P12) C(-0.,N) - C(-0.,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(-0.,P12) C(N,N) - C(-0.,-P12) C(U,U) C(-0.,-0.) C(-0.,0.) C(U,U) C(-0.,P12) C(-0.,N) - C(0.,-P12) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(0.,P12) C(0.,N) - C(0.,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(0.,P12) C(N,N) - C(0.,-P12) C(0.,-P12) C(0.,-P12) C(0.,P12) C(0.,P12) C(0.,P12) C(0.,N) - C(0.,-P12) C(N,N) C(N,N) C(N,N) C(N,N) C(0.,P12) C(N,N) - }) - - INIT_SPECIAL_VALUES(cosh_special_values, { - C(INF,N) C(U,U) C(INF,0.) C(INF,-0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(N,0.) C(U,U) C(1.,0.) C(1.,-0.) C(U,U) C(N,0.) C(N,0.) - C(N,0.) C(U,U) C(1.,-0.) C(1.,0.) C(U,U) C(N,0.) C(N,0.) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(INF,N) C(U,U) C(INF,-0.) C(INF,0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(N,N) C(N,0.) C(N,0.) C(N,N) C(N,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(exp_special_values, { - C(0.,0.) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(0.,0.) C(0.,0.) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(N,N) C(U,U) C(1.,-0.) C(1.,0.) C(U,U) C(N,N) C(N,N) - C(N,N) C(U,U) C(1.,-0.) C(1.,0.) C(U,U) C(N,N) C(N,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(INF,N) C(U,U) C(INF,-0.) C(INF,0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(N,N) C(N,-0.) C(N,0.) C(N,N) C(N,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(log_special_values, { - C(INF,-P34) C(INF,-P) C(INF,-P) C(INF,P) C(INF,P) C(INF,P34) C(INF,N) - C(INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(-INF,-P) C(-INF,P) C(U,U) C(INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(-INF,-0.) C(-INF,0.) C(U,U) C(INF,P12) C(N,N) - C(INF,-P12) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,P12) C(N,N) - C(INF,-P14) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,P14) C(INF,N) - C(INF,N) C(N,N) C(N,N) C(N,N) C(N,N) C(INF,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(sinh_special_values, { - C(INF,N) C(U,U) C(-INF,-0.) C(-INF,0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(0.,N) C(U,U) C(-0.,-0.) C(-0.,0.) C(U,U) C(0.,N) C(0.,N) - C(0.,N) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(0.,N) C(0.,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(INF,N) C(U,U) C(INF,-0.) C(INF,0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(N,N) C(N,-0.) C(N,0.) C(N,N) C(N,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(sqrt_special_values, { - C(INF,-INF) C(0.,-INF) C(0.,-INF) C(0.,INF) C(0.,INF) C(INF,INF) C(N,INF) - C(INF,-INF) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,INF) C(N,N) - C(INF,-INF) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(INF,INF) C(N,N) - C(INF,-INF) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(INF,INF) C(N,N) - C(INF,-INF) C(U,U) C(U,U) C(U,U) C(U,U) C(INF,INF) C(N,N) - C(INF,-INF) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,INF) C(INF,N) - C(INF,-INF) C(N,N) C(N,N) C(N,N) C(N,N) C(INF,INF) C(N,N) - }) - - INIT_SPECIAL_VALUES(tanh_special_values, { - C(-1.,0.) C(U,U) C(-1.,-0.) C(-1.,0.) C(U,U) C(-1.,0.) C(-1.,0.) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(-0.0,N) C(U,U) C(-0.,-0.) C(-0.,0.) C(U,U) C(-0.0,N) C(-0.,N) - C(0.0,N) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(0.0,N) C(0.,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(1.,0.) C(U,U) C(1.,-0.) C(1.,0.) C(U,U) C(1.,0.) C(1.,0.) - C(N,N) C(N,N) C(N,-0.) C(N,0.) C(N,N) C(N,N) C(N,N) - }) - - INIT_SPECIAL_VALUES(rect_special_values, { - C(INF,N) C(U,U) C(-INF,0.) C(-INF,-0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(0.,0.) C(U,U) C(-0.,0.) C(-0.,-0.) C(U,U) C(0.,0.) C(0.,0.) - C(0.,0.) C(U,U) C(0.,-0.) C(0.,0.) C(U,U) C(0.,0.) C(0.,0.) - C(N,N) C(U,U) C(U,U) C(U,U) C(U,U) C(N,N) C(N,N) - C(INF,N) C(U,U) C(INF,-0.) C(INF,0.) C(U,U) C(INF,N) C(INF,N) - C(N,N) C(N,N) C(N,0.) C(N,0.) C(N,N) C(N,N) C(N,N) - }) return 0; } diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 610e1ddc0e94ba..290dfeb0f6dd6a 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -19,6 +19,7 @@ Copyright (c) 2023 Hanno Böck <hanno@gentoo.org> Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> + Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -42,21 +43,21 @@ */ #ifndef Expat_INCLUDED -#define Expat_INCLUDED 1 +# define Expat_INCLUDED 1 -#include <stdlib.h> -#include "expat_external.h" +# include <stdlib.h> +# include "expat_external.h" -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif struct XML_ParserStruct; typedef struct XML_ParserStruct *XML_Parser; typedef unsigned char XML_Bool; -#define XML_TRUE ((XML_Bool)1) -#define XML_FALSE ((XML_Bool)0) +# define XML_TRUE ((XML_Bool)1) +# define XML_FALSE ((XML_Bool)0) /* The XML_Status enum gives the possible return values for several API functions. The preprocessor #defines are included so this @@ -73,11 +74,11 @@ typedef unsigned char XML_Bool; */ enum XML_Status { XML_STATUS_ERROR = 0, -#define XML_STATUS_ERROR XML_STATUS_ERROR +# define XML_STATUS_ERROR XML_STATUS_ERROR XML_STATUS_OK = 1, -#define XML_STATUS_OK XML_STATUS_OK +# define XML_STATUS_OK XML_STATUS_OK XML_STATUS_SUSPENDED = 2 -#define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED +# define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED }; enum XML_Error { @@ -276,7 +277,7 @@ XML_ParserCreate_MM(const XML_Char *encoding, /* Prepare a parser object to be reused. This is particularly valuable when memory allocation overhead is disproportionately high, - such as when a large number of small documnents need to be parsed. + such as when a large number of small documents need to be parsed. All handlers are cleared from the parser, except for the unknownEncodingHandler. The parser's external state is re-initialized except for the values of ns and ns_triplets. @@ -680,7 +681,7 @@ XMLPARSEAPI(void) XML_SetUserData(XML_Parser parser, void *userData); /* Returns the last value set by XML_SetUserData or NULL. */ -#define XML_GetUserData(parser) (*(void **)(parser)) +# define XML_GetUserData(parser) (*(void **)(parser)) /* This is equivalent to supplying an encoding argument to XML_ParserCreate. On success XML_SetEncoding returns non-zero, @@ -752,7 +753,7 @@ XML_GetSpecifiedAttributeCount(XML_Parser parser); XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); -#ifdef XML_ATTR_INFO +# ifdef XML_ATTR_INFO /* Source file byte offsets for the start and end of attribute names and values. The value indices are exclusive of surrounding quotes; thus in a UTF-8 source file an attribute value of "blah" will yield: @@ -773,7 +774,7 @@ typedef struct { */ XMLPARSEAPI(const XML_AttrInfo *) XML_GetAttributeInfo(XML_Parser parser); -#endif +# endif /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is detected. The last call to XML_Parse must have isFinal true; len @@ -970,9 +971,9 @@ XMLPARSEAPI(const char *) XML_GetInputContext(XML_Parser parser, int *offset, int *size); /* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber -#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex +# define XML_GetErrorLineNumber XML_GetCurrentLineNumber +# define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber +# define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees the content model passed to the element declaration handler */ XMLPARSEAPI(void) @@ -1032,7 +1033,10 @@ enum XML_FeatureEnum { XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, /* Added in Expat 2.6.0. */ - XML_FEATURE_GE + XML_FEATURE_GE, + /* Added in Expat 2.7.2. */ + XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, /* Additional features must be added to the end of this enum. */ }; @@ -1045,7 +1049,7 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) +# if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) /* Added in Expat 2.4.0 for XML_DTD defined and * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) @@ -1057,7 +1061,17 @@ XML_SetBillionLaughsAttackProtectionMaximumAmplification( XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); -#endif + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor); + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); +# endif /* Added in Expat 2.6.0. */ XMLPARSEAPI(XML_Bool) @@ -1066,12 +1080,12 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); /* Expat follows the semantic versioning convention. See https://semver.org */ -#define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 7 -#define XML_MICRO_VERSION 1 +# define XML_MAJOR_VERSION 2 +# define XML_MINOR_VERSION 7 +# define XML_MICRO_VERSION 3 -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not Expat_INCLUDED */ diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h index 567872b09836e1..0f01a05d0e9560 100644 --- a/Modules/expat/expat_external.h +++ b/Modules/expat/expat_external.h @@ -38,8 +38,7 @@ */ #ifndef Expat_External_INCLUDED -#define Expat_External_INCLUDED 1 - +# define Expat_External_INCLUDED 1 /* Namespace external symbols to allow multiple libexpat version to co-exist. */ #include "pyexpatns.h" @@ -68,12 +67,12 @@ compiled with the cdecl calling convention as the default since system headers may assume the cdecl convention. */ -#ifndef XMLCALL -# if defined(_MSC_VER) -# define XMLCALL __cdecl -# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) -# define XMLCALL __attribute__((cdecl)) -# else +# ifndef XMLCALL +# if defined(_MSC_VER) +# define XMLCALL __cdecl +# elif defined(__GNUC__) && defined(__i386) && ! defined(__INTEL_COMPILER) +# define XMLCALL __attribute__((cdecl)) +# else /* For any platform which uses this definition and supports more than one calling convention, we need to extend this definition to declare the convention used on that platform, if it's possible to @@ -84,86 +83,87 @@ pre-processor and how to specify the same calling convention as the platform's malloc() implementation. */ -# define XMLCALL -# endif -#endif /* not defined XMLCALL */ +# define XMLCALL +# endif +# endif /* not defined XMLCALL */ -#if ! defined(XML_STATIC) && ! defined(XMLIMPORT) -# ifndef XML_BUILDING_EXPAT +# if ! defined(XML_STATIC) && ! defined(XMLIMPORT) +# ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) && ! defined(__CYGWIN__) -# define XMLIMPORT __declspec(dllimport) +# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) \ + && ! defined(__CYGWIN__) +# define XMLIMPORT __declspec(dllimport) +# endif + # endif +# endif /* not defined XML_STATIC */ +# ifndef XML_ENABLE_VISIBILITY +# define XML_ENABLE_VISIBILITY 0 # endif -#endif /* not defined XML_STATIC */ - -#ifndef XML_ENABLE_VISIBILITY -# define XML_ENABLE_VISIBILITY 0 -#endif -#if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY -# define XMLIMPORT __attribute__((visibility("default"))) -#endif +# if ! defined(XMLIMPORT) && XML_ENABLE_VISIBILITY +# define XMLIMPORT __attribute__((visibility("default"))) +# endif /* If we didn't define it above, define it away: */ -#ifndef XMLIMPORT -# define XMLIMPORT -#endif - -#if defined(__GNUC__) \ - && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) -# define XML_ATTR_MALLOC __attribute__((__malloc__)) -#else -# define XML_ATTR_MALLOC -#endif - -#if defined(__GNUC__) \ - && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) -# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) -#else -# define XML_ATTR_ALLOC_SIZE(x) -#endif - -#define XMLPARSEAPI(type) XMLIMPORT type XMLCALL - -#ifdef __cplusplus -extern "C" { -#endif +# ifndef XMLIMPORT +# define XMLIMPORT +# endif -#ifdef XML_UNICODE_WCHAR_T -# ifndef XML_UNICODE -# define XML_UNICODE +# if defined(__GNUC__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +# define XML_ATTR_MALLOC __attribute__((__malloc__)) +# else +# define XML_ATTR_MALLOC # endif -# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) -# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" + +# if defined(__GNUC__) \ + && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +# else +# define XML_ATTR_ALLOC_SIZE(x) +# endif + +# define XMLPARSEAPI(type) XMLIMPORT type XMLCALL + +# ifdef __cplusplus +extern "C" { # endif -#endif -#ifdef XML_UNICODE /* Information is UTF-16 encoded. */ # ifdef XML_UNICODE_WCHAR_T +# ifndef XML_UNICODE +# define XML_UNICODE +# endif +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif +# endif + +# ifdef XML_UNICODE /* Information is UTF-16 encoded. */ +# ifdef XML_UNICODE_WCHAR_T typedef wchar_t XML_Char; typedef wchar_t XML_LChar; -# else +# else typedef unsigned short XML_Char; typedef char XML_LChar; -# endif /* XML_UNICODE_WCHAR_T */ -#else /* Information is UTF-8 encoded. */ +# endif /* XML_UNICODE_WCHAR_T */ +# else /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; -#endif /* XML_UNICODE */ +# endif /* XML_UNICODE */ -#ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ +# ifdef XML_LARGE_SIZE /* Use large integers for file/stream positions. */ typedef long long XML_Index; typedef unsigned long long XML_Size; -#else +# else typedef long XML_Index; typedef unsigned long XML_Size; -#endif /* XML_LARGE_SIZE */ +# endif /* XML_LARGE_SIZE */ -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not Expat_External_INCLUDED */ diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h index 6bde6ae6b31ddd..8f5edf48ef7c00 100644 --- a/Modules/expat/internal.h +++ b/Modules/expat/internal.h @@ -108,6 +108,7 @@ #endif #include <limits.h> // ULONG_MAX +#include <stddef.h> // size_t #if defined(_WIN32) \ && (! defined(__USE_MINGW_ANSI_STDIO) \ @@ -148,6 +149,16 @@ 100.0f #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ 8388608 // 8 MiB, 2^23 + +#define EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f +#define EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT \ + 67108864 // 64 MiB, 2^26 + +// NOTE: If function expat_alloc was user facing, EXPAT_MALLOC_ALIGNMENT would +// have to take sizeof(long double) into account +#define EXPAT_MALLOC_ALIGNMENT sizeof(long long) // largest parser (sub)member +#define EXPAT_MALLOC_PADDING ((EXPAT_MALLOC_ALIGNMENT) - sizeof(size_t)) + /* NOTE END */ #include "expat.h" // so we can use type XML_Parser below @@ -171,6 +182,9 @@ extern #endif XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c #if defined(XML_TESTING) +void *expat_malloc(XML_Parser parser, size_t size, int sourceLine); +void expat_free(XML_Parser parser, void *ptr, int sourceLine); +void *expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine); extern unsigned int g_bytesScanned; // used for testing only #endif diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index 8ee03ef0792815..fc6b482d587e0d 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -82,6 +82,8 @@ #define XmlPrologStateInit PyExpat_XmlPrologStateInit #define XmlPrologStateInitExternalEntity PyExpat_XmlPrologStateInitExternalEntity #define XML_ResumeParser PyExpat_XML_ResumeParser +#define XML_SetAllocTrackerActivationThreshold PyExpat_XML_SetAllocTrackerActivationThreshold +#define XML_SetAllocTrackerMaximumAmplification PyExpat_XML_SetAllocTrackerMaximumAmplification #define XML_SetAttlistDeclHandler PyExpat_XML_SetAttlistDeclHandler #define XML_SetBase PyExpat_XML_SetBase #define XML_SetBillionLaughsAttackProtectionActivationThreshold PyExpat_XML_SetBillionLaughsAttackProtectionActivationThreshold diff --git a/Modules/expat/refresh.sh b/Modules/expat/refresh.sh index 3904fc8afd63d2..bb1a805277c6f1 100755 --- a/Modules/expat/refresh.sh +++ b/Modules/expat/refresh.sh @@ -12,9 +12,9 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. These values are used for verifying the SBOM, too. -expected_libexpat_tag="R_2_7_1" -expected_libexpat_version="2.7.1" -expected_libexpat_sha256="0cce2e6e69b327fc607b8ff264f4b66bdf71ead55a87ffd5f3143f535f15cfa2" +expected_libexpat_tag="R_2_7_3" +expected_libexpat_version="2.7.3" +expected_libexpat_sha256="821ac9710d2c073eaf13e1b1895a9c9aa66c1157a99635c639fbff65cdbdd732" expat_dir="$(realpath "$(dirname -- "${BASH_SOURCE[0]}")")" cd ${expat_dir} @@ -52,7 +52,7 @@ done rm libexpat.tar.gz # Step 3: Add the namespacing include to expat_external.h -sed -i 's/#define Expat_External_INCLUDED 1/&\n\n\/* Namespace external symbols to allow multiple libexpat version to\n co-exist. \*\/\n#include "pyexpatns.h"/' expat_external.h +sed -i 's/# define Expat_External_INCLUDED 1/&\n\/* Namespace external symbols to allow multiple libexpat version to\n co-exist. \*\/\n#include "pyexpatns.h"/' expat_external.h echo " Updated! next steps: diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index 38a2d9657b6aeb..a187a3a18f1994 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+) +/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -41,6 +41,7 @@ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> + Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -97,7 +98,7 @@ #include <stddef.h> #include <string.h> /* memset(), memcpy() */ #include <assert.h> -#include <limits.h> /* UINT_MAX */ +#include <limits.h> /* INT_MAX, UINT_MAX */ #include <stdio.h> /* fprintf */ #include <stdlib.h> /* getenv, rand_s */ #include <stdint.h> /* uintptr_t */ @@ -234,7 +235,7 @@ typedef struct { unsigned char power; size_t size; size_t used; - const XML_Memory_Handling_Suite *mem; + XML_Parser parser; } HASH_TABLE; static size_t keylen(KEY s); @@ -357,7 +358,7 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; - const XML_Memory_Handling_Suite *mem; + XML_Parser parser; } STRING_POOL; /* The XML_Char before the name is used to determine whether @@ -452,6 +453,14 @@ typedef struct accounting { unsigned long long activationThresholdBytes; } ACCOUNTING; +typedef struct MALLOC_TRACKER { + XmlBigCount bytesAllocated; + XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 + unsigned long debugLevel; + float maximumAmplificationFactor; // >=1.0 + XmlBigCount activationThresholdBytes; +} MALLOC_TRACKER; + typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; @@ -555,27 +564,24 @@ static XML_Bool setContext(XML_Parser parser, const XML_Char *context); static void FASTCALL normalizePublicId(XML_Char *s); -static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms); +static DTD *dtdCreate(XML_Parser parser); /* do not call if m_parentParser != NULL */ -static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); -static void dtdDestroy(DTD *p, XML_Bool isDocEntity, - const XML_Memory_Handling_Suite *ms); +static void dtdReset(DTD *p, XML_Parser parser); +static void dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser); static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, - const XML_Memory_Handling_Suite *ms); + XML_Parser parser); static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable); static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); -static void FASTCALL hashTableInit(HASH_TABLE *table, - const XML_Memory_Handling_Suite *ms); +static void FASTCALL hashTableInit(HASH_TABLE *table, XML_Parser parser); static void FASTCALL hashTableClear(HASH_TABLE *table); static void FASTCALL hashTableDestroy(HASH_TABLE *table); static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table); static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter); -static void FASTCALL poolInit(STRING_POOL *pool, - const XML_Memory_Handling_Suite *ms); +static void FASTCALL poolInit(STRING_POOL *pool, XML_Parser parser); static void FASTCALL poolClear(STRING_POOL *pool); static void FASTCALL poolDestroy(STRING_POOL *pool); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, @@ -595,15 +601,15 @@ static XML_Content *build_model(XML_Parser parser); static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); -static XML_Char *copyString(const XML_Char *s, - const XML_Memory_Handling_Suite *memsuite); +static XML_Char *copyString(const XML_Char *s, XML_Parser parser); static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, DTD *dtd); + const XML_Char *nameSep, DTD *dtd, + XML_Parser parentParser); static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -627,10 +633,10 @@ static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, int sourceLine); static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, int sourceLine); +#endif /* XML_GE == 1 */ static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff); -#endif /* XML_GE == 1 */ static unsigned long getDebugLevel(const char *variableName, unsigned long defaultDebugLevel); @@ -773,14 +779,238 @@ struct XML_ParserStruct { unsigned long m_hash_secret_salt; #if XML_GE == 1 ACCOUNTING m_accounting; + MALLOC_TRACKER m_alloc_tracker; ENTITY_STATS m_entity_stats; #endif XML_Bool m_reenter; }; -#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) -#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) -#define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#if XML_GE == 1 +# define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) +# define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) +# define FREE(parser, p) (expat_free((parser), (p), __LINE__)) +#else +# define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +# define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +# define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#endif + +#if XML_GE == 1 +static void +expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, + XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { + // NOTE: This can be +infinity or -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + fprintf( + stderr, + "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( + "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator, + absDiff, newTotal, peakTotal, (double)amplification, sourceLine); +} + +static bool +expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, + int sourceLine) { + assert(rootParser != NULL); + assert(increase > 0); + + XmlBigCount newTotal = 0; + bool tolerable = true; + + // Detect integer overflow + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { + tolerable = false; + } else { + newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; + + if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { + assert(newTotal > 0); + // NOTE: This can be +infinity when dividing by zero but not -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + if (amplification + > rootParser->m_alloc_tracker.maximumAmplificationFactor) { + tolerable = false; + } + } + } + + if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { + expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); + } + + return tolerable; +} + +# if defined(XML_TESTING) +void * +# else +static void * +# endif +expat_malloc(XML_Parser parser, size_t size, int sourceLine) { + // Detect integer overflow + if (SIZE_MAX - size < sizeof(size_t) + EXPAT_MALLOC_PADDING) { + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + const size_t bytesToAllocate = sizeof(size_t) + EXPAT_MALLOC_PADDING + size; + + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + < bytesToAllocate) { + return NULL; // i.e. signal integer overflow as out-of-memory + } + + if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, + sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + + // Actually allocate + void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + // Update accounting + rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, '+', bytesToAllocate, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; +} + +# if defined(XML_TESTING) +void +# else +static void +# endif +expat_free(XML_Parser parser, void *ptr, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract size (to the eyes of malloc_fcn/realloc_fcn) and + // the original pointer returned by malloc/realloc + void *const mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); + const size_t bytesAllocated + = sizeof(size_t) + EXPAT_MALLOC_PADDING + *(size_t *)mallocedPtr; + + // Update accounting + assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); + rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + expat_heap_stat(rootParser, '-', bytesAllocated, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // NOTE: This may be freeing rootParser, so freeing has to come last + parser->m_mem.free_fcn(mallocedPtr); +} + +# if defined(XML_TESTING) +void * +# else +static void * +# endif +expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return expat_malloc(parser, size, sourceLine); + } + + if (size == 0) { + expat_free(parser, ptr, sourceLine); + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract original size (to the eyes of the caller) and the original + // pointer returned by malloc/realloc + void *mallocedPtr = (char *)ptr - EXPAT_MALLOC_PADDING - sizeof(size_t); + const size_t prevSize = *(size_t *)mallocedPtr; + + // Classify upcoming change + const bool isIncrease = (size > prevSize); + const size_t absDiff + = (size > prevSize) ? (size - prevSize) : (prevSize - size); + + // Ask for permission from accounting + if (isIncrease) { + if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + } + + // NOTE: Integer overflow detection has already been done for us + // by expat_heap_increase_tolerable(..) above + assert(SIZE_MAX - sizeof(size_t) - EXPAT_MALLOC_PADDING >= size); + + // Actually allocate + mallocedPtr = parser->m_mem.realloc_fcn( + mallocedPtr, sizeof(size_t) + EXPAT_MALLOC_PADDING + size); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update accounting + if (isIncrease) { + assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated += absDiff; + } else { // i.e. decrease + assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated -= absDiff; + } + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + return (char *)mallocedPtr + sizeof(size_t) + EXPAT_MALLOC_PADDING; +} +#endif // XML_GE == 1 XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) { @@ -821,11 +1051,14 @@ writeRandomBytes_getrandom_nonblock(void *target, size_t count) { void *const currentTarget = (void *)((char *)target + bytesWrittenTotal); const size_t bytesToWrite = count - bytesWrittenTotal; + assert(bytesToWrite <= INT_MAX); + const int bytesWrittenMore = # if defined(HAVE_GETRANDOM) - getrandom(currentTarget, bytesToWrite, getrandomFlags); + (int)getrandom(currentTarget, bytesToWrite, getrandomFlags); # else - syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); + (int)syscall(SYS_getrandom, currentTarget, bytesToWrite, + getrandomFlags); # endif if (bytesWrittenMore > 0) { @@ -1012,9 +1245,10 @@ generate_hash_secret_salt(XML_Parser parser) { static unsigned long get_hash_secret_salt(XML_Parser parser) { - if (parser->m_parentParser != NULL) - return get_hash_secret_salt(parser->m_parentParser); - return parser->m_hash_secret_salt; + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + + return rootParser->m_hash_secret_salt; } static enum XML_Error @@ -1100,19 +1334,43 @@ XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - return parserCreate(encodingName, memsuite, nameSep, NULL); + return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, - DTD *dtd) { - XML_Parser parser; + DTD *dtd, XML_Parser parentParser) { + XML_Parser parser = NULL; + +#if XML_GE == 1 + const size_t increase + = sizeof(size_t) + EXPAT_MALLOC_PADDING + sizeof(struct XML_ParserStruct); + + if (parentParser != NULL) { + const XML_Parser rootParser = getRootParserOf(parentParser, NULL); + if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { + return NULL; + } + } +#else + UNUSED_P(parentParser); +#endif if (memsuite) { XML_Memory_Handling_Suite *mtemp; +#if XML_GE == 1 + void *const sizeAndParser + = memsuite->malloc_fcn(sizeof(size_t) + EXPAT_MALLOC_PADDING + + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) + + EXPAT_MALLOC_PADDING); +#else parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; @@ -1120,39 +1378,86 @@ parserCreate(const XML_Char *encodingName, } } else { XML_Memory_Handling_Suite *mtemp; - parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); +#if XML_GE == 1 + void *const sizeAndParser = malloc(sizeof(size_t) + EXPAT_MALLOC_PADDING + + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t) + + EXPAT_MALLOC_PADDING); +#else + parser = malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = malloc; mtemp->realloc_fcn = realloc; mtemp->free_fcn = free; } - } + } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 if (! parser) return parser; +#if XML_GE == 1 + // Initialize .m_alloc_tracker + memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); + if (parentParser == NULL) { + parser->m_alloc_tracker.debugLevel + = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); + parser->m_alloc_tracker.maximumAmplificationFactor + = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_alloc_tracker.activationThresholdBytes + = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; + + // NOTE: This initialization needs to come this early because these fields + // are read by allocation tracking code + parser->m_parentParser = NULL; + parser->m_accounting.countBytesDirect = 0; + } else { + parser->m_parentParser = parentParser; + } + + // Record XML_ParserStruct allocation we did a few lines up before + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); + rootParser->m_alloc_tracker.bytesAllocated += increase; + + // Report on allocation + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + + expat_heap_stat(rootParser, '+', increase, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); + } +#else + parser->m_parentParser = NULL; +#endif // XML_GE == 1 + parser->m_buffer = NULL; parser->m_bufferLim = NULL; parser->m_attsSize = INIT_ATTS_SIZE; - parser->m_atts - = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); + parser->m_atts = MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE)); if (parser->m_atts == NULL) { FREE(parser, parser); return NULL; } #ifdef XML_ATTR_INFO - parser->m_attInfo = (XML_AttrInfo *)MALLOC( - parser, parser->m_attsSize * sizeof(XML_AttrInfo)); + parser->m_attInfo = MALLOC(parser, parser->m_attsSize * sizeof(XML_AttrInfo)); if (parser->m_attInfo == NULL) { FREE(parser, parser->m_atts); FREE(parser, parser); return NULL; } #endif - parser->m_dataBuf - = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + parser->m_dataBuf = MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char)); if (parser->m_dataBuf == NULL) { FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO @@ -1166,7 +1471,7 @@ parserCreate(const XML_Char *encodingName, if (dtd) parser->m_dtd = dtd; else { - parser->m_dtd = dtdCreate(&parser->m_mem); + parser->m_dtd = dtdCreate(parser); if (parser->m_dtd == NULL) { FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_atts); @@ -1200,8 +1505,8 @@ parserCreate(const XML_Char *encodingName, parser->m_protocolEncodingName = NULL; - poolInit(&parser->m_tempPool, &(parser->m_mem)); - poolInit(&parser->m_temp2Pool, &(parser->m_mem)); + poolInit(&parser->m_tempPool, parser); + poolInit(&parser->m_temp2Pool, parser); parserInit(parser, encodingName); if (encodingName && ! parser->m_protocolEncodingName) { @@ -1233,7 +1538,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_processor = prologInitProcessor; XmlPrologStateInit(&parser->m_prologState); if (encodingName != NULL) { - parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + parser->m_protocolEncodingName = copyString(encodingName, parser); } parser->m_curBase = NULL; XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0); @@ -1295,7 +1600,6 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_unknownEncodingMem = NULL; parser->m_unknownEncodingRelease = NULL; parser->m_unknownEncodingData = NULL; - parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; // Reentry can only be triggered inside m_processor calls parser->m_reenter = XML_FALSE; @@ -1385,7 +1689,7 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { FREE(parser, (void *)parser->m_protocolEncodingName); parser->m_protocolEncodingName = NULL; parserInit(parser, encodingName); - dtdReset(parser->m_dtd, &parser->m_mem); + dtdReset(parser->m_dtd, parser); return XML_TRUE; } @@ -1421,7 +1725,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { parser->m_protocolEncodingName = NULL; else { /* Copy the new encoding name into allocated memory */ - parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem)); + parser->m_protocolEncodingName = copyString(encodingName, parser); if (! parser->m_protocolEncodingName) return XML_STATUS_ERROR; } @@ -1530,9 +1834,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, */ if (parser->m_ns) { XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; - parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); } else { - parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); + parser + = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); } if (! parser) @@ -1576,7 +1881,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem) + if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, parser) || ! setContext(parser, context)) { XML_ParserFree(parser); return NULL; @@ -1688,14 +1993,16 @@ XML_ParserFree(XML_Parser parser) { #else if (parser->m_dtd) #endif /* XML_DTD */ - dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, - &parser->m_mem); - FREE(parser, (void *)parser->m_atts); + dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser, parser); + FREE(parser, parser->m_atts); #ifdef XML_ATTR_INFO - FREE(parser, (void *)parser->m_attInfo); + FREE(parser, parser->m_attInfo); #endif FREE(parser, parser->m_groupConnector); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); FREE(parser, parser->m_dataBuf); FREE(parser, parser->m_nsAtts); FREE(parser, parser->m_unknownEncodingMem); @@ -2014,12 +2321,14 @@ int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { if (parser == NULL) return 0; - if (parser->m_parentParser) - return XML_SetHashSalt(parser->m_parentParser, hash_salt); + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(! rootParser->m_parentParser); + /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parserBusy(parser)) + if (parserBusy(rootParser)) return 0; - parser->m_hash_secret_salt = hash_salt; + rootParser->m_hash_secret_salt = hash_salt; return 1; } @@ -2287,7 +2596,9 @@ XML_GetBuffer(XML_Parser parser, int len) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } - newBuf = (char *)MALLOC(parser, bufferSize); + // NOTE: We are avoiding MALLOC(..) here to leave limiting + // the input size to the application using Expat. + newBuf = parser->m_mem.malloc_fcn(bufferSize); if (newBuf == 0) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; @@ -2298,7 +2609,10 @@ XML_GetBuffer(XML_Parser parser, int len) { memcpy(newBuf, &parser->m_bufferPtr[-keep], EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr) + keep); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); parser->m_buffer = newBuf; parser->m_bufferEnd = parser->m_buffer @@ -2314,7 +2628,10 @@ XML_GetBuffer(XML_Parser parser, int len) { if (parser->m_bufferPtr) { memcpy(newBuf, parser->m_bufferPtr, EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)); - FREE(parser, parser->m_buffer); + // NOTE: We are avoiding FREE(..) here because parser->m_buffer + // is not being allocated with MALLOC(..) but with plain + // .malloc_fcn(..). + parser->m_mem.free_fcn(parser->m_buffer); parser->m_bufferEnd = newBuf + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr); @@ -2492,28 +2809,43 @@ XML_GetCurrentColumnNumber(XML_Parser parser) { void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { - if (parser != NULL) - FREE(parser, model); + if (parser == NULL) + return; + + // NOTE: We are avoiding FREE(..) here because the content model + // has been created using plain .malloc_fcn(..) rather than MALLOC(..). + parser->m_mem.free_fcn(model); } void *XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { if (parser == NULL) return NULL; - return MALLOC(parser, size); + + // NOTE: We are avoiding MALLOC(..) here to not include + // user allocations with allocation tracking and limiting. + return parser->m_mem.malloc_fcn(size); } void *XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { if (parser == NULL) return NULL; - return REALLOC(parser, ptr, size); + + // NOTE: We are avoiding REALLOC(..) here to not include + // user allocations with allocation tracking and limiting. + return parser->m_mem.realloc_fcn(ptr, size); } void XMLCALL XML_MemFree(XML_Parser parser, void *ptr) { - if (parser != NULL) - FREE(parser, ptr); + if (parser == NULL) + return; + + // NOTE: We are avoiding FREE(..) here because XML_MemMalloc and + // XML_MemRealloc are not using MALLOC(..) and REALLOC(..) + // but plain .malloc_fcn(..) and .realloc_fcn(..), internally. + parser->m_mem.free_fcn(ptr); } void XMLCALL @@ -2713,6 +3045,13 @@ XML_GetFeatureList(void) { EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, /* Added in Expat 2.6.0. */ {XML_FEATURE_GE, XML_L("XML_GE"), 0}, + /* Added in Expat 2.7.2. */ + {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_AT_MAX_AMP"), + (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_AT_ACT_THRES"), + (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, #endif {XML_FEATURE_END, NULL, 0}}; @@ -2741,6 +3080,29 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } + +XML_Bool XMLCALL +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_alloc_tracker.maximumAmplificationFactor + = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} #endif /* XML_GE == 1 */ XML_Bool XMLCALL @@ -2761,8 +3123,8 @@ static XML_Bool storeRawNames(XML_Parser parser) { TAG *tag = parser->m_tagStack; while (tag) { - int bufSize; - int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); + size_t bufSize; + size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; char *rawNameBuf = tag->buf + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop @@ -2779,9 +3141,9 @@ storeRawNames(XML_Parser parser) { /* Detect and prevent integer overflow. */ if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; - bufSize = nameLen + (int)rawNameLen; - if (bufSize > tag->bufEnd - tag->buf) { - char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + bufSize = nameLen + rawNameLen; + if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { + char *temp = REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_FALSE; /* if tag->name.str points to tag->buf (only when namespace @@ -3107,10 +3469,10 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, tag = parser->m_freeTagList; parser->m_freeTagList = parser->m_freeTagList->parent; } else { - tag = (TAG *)MALLOC(parser, sizeof(TAG)); + tag = MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; - tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE); + tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); if (! tag->buf) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; @@ -3143,7 +3505,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, } bufSize = (int)(tag->bufEnd - tag->buf) << 1; { - char *temp = (char *)REALLOC(parser, tag->buf, bufSize); + char *temp = REALLOC(parser, tag->buf, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; tag->buf = temp; @@ -3522,8 +3884,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts, - parser->m_attsSize * sizeof(ATTRIBUTE)); + temp = REALLOC(parser, parser->m_atts, + parser->m_attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; @@ -3541,8 +3903,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } # endif - temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo, - parser->m_attsSize * sizeof(XML_AttrInfo)); + temp2 = REALLOC(parser, parser->m_attInfo, + parser->m_attsSize * sizeof(XML_AttrInfo)); if (temp2 == NULL) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; @@ -3677,7 +4039,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - int j; /* hash table index */ + unsigned int j; /* hash table index */ unsigned long version = parser->m_nsAttsVersion; /* Detect and prevent invalid shift */ @@ -3718,8 +4080,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts, - nsAttsSize * sizeof(NS_ATT)); + temp = REALLOC(parser, parser->m_nsAtts, nsAttsSize * sizeof(NS_ATT)); if (! temp) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; @@ -3772,7 +4133,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, if (! b) return XML_ERROR_UNBOUND_PREFIX; - for (j = 0; j < b->uriLen; j++) { + for (j = 0; j < (unsigned int)b->uriLen; j++) { const XML_Char c = b->uri[j]; if (! poolAppendChar(&parser->m_tempPool, c)) return XML_ERROR_NO_MEMORY; @@ -3866,7 +4227,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, return XML_ERROR_NONE; prefixLen = 0; if (parser->m_ns_triplets && binding->prefix->name) { - for (; binding->prefix->name[prefixLen++];) + while (binding->prefix->name[prefixLen++]) ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; @@ -3900,7 +4261,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, } #endif - uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); + uri = MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char)); if (! uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; @@ -4146,8 +4507,8 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } #endif - XML_Char *temp = (XML_Char *)REALLOC( - parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); + XML_Char *temp + = REALLOC(parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; b->uri = temp; @@ -4155,7 +4516,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } parser->m_freeBindingList = b->nextTagBinding; } else { - b = (BINDING *)MALLOC(parser, sizeof(BINDING)); + b = MALLOC(parser, sizeof(BINDING)); if (! b) return XML_ERROR_NO_MEMORY; @@ -4173,8 +4534,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, } #endif - b->uri - = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); + b->uri = MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (! b->uri) { FREE(parser, b); return XML_ERROR_NO_MEMORY; @@ -5545,7 +5905,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, return XML_ERROR_NO_MEMORY; } - char *const new_connector = (char *)REALLOC( + char *const new_connector = REALLOC( parser, parser->m_groupConnector, parser->m_groupSize *= 2); if (new_connector == NULL) { parser->m_groupSize /= 2; @@ -5565,15 +5925,14 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, } #endif - int *const new_scaff_index = (int *)REALLOC( + int *const new_scaff_index = REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); if (new_scaff_index == NULL) return XML_ERROR_NO_MEMORY; dtd->scaffIndex = new_scaff_index; } } else { - parser->m_groupConnector - = (char *)MALLOC(parser, parser->m_groupSize = 32); + parser->m_groupConnector = MALLOC(parser, parser->m_groupSize = 32); if (! parser->m_groupConnector) { parser->m_groupSize = 0; return XML_ERROR_NO_MEMORY; @@ -5730,8 +6089,11 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, case XML_ROLE_CONTENT_EMPTY: if (dtd->in_eldecl) { if (parser->m_elementDeclHandler) { - XML_Content *content - = (XML_Content *)MALLOC(parser, sizeof(XML_Content)); + // NOTE: We are avoiding MALLOC(..) here to so that + // applications that are not using XML_FreeContentModel but + // plain free(..) or .free_fcn() to free the content model's + // memory are safe. + XML_Content *content = parser->m_mem.malloc_fcn(sizeof(XML_Content)); if (! content) return XML_ERROR_NO_MEMORY; content->quant = XML_CQUANT_NONE; @@ -5787,7 +6149,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, name = el->name; dtd->scaffold[myindex].name = name; nameLen = 0; - for (; name[nameLen++];) + while (name[nameLen++]) ; /* Detect and prevent integer overflow */ @@ -6008,8 +6370,7 @@ processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl, openEntity = *freeEntityList; *freeEntityList = openEntity->next; } else { - openEntity - = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); + openEntity = MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY)); if (! openEntity) return XML_ERROR_NO_MEMORY; } @@ -6087,6 +6448,10 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, // process its possible inner entities (which are added to the // m_openInternalEntities during doProlog or doContent calls above) entity->hasMore = XML_FALSE; + if (! entity->is_param + && (openEntity->startTagLevel != parser->m_tagLevel)) { + return XML_ERROR_ASYNC_ENTITY; + } triggerReenter(parser); return result; } // End of entity processing, "if" block will return here @@ -6277,7 +6642,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; - char checkEntityDecl; + bool checkEntityDecl; XML_Char ch = (XML_Char)XmlPredefinedEntityName( enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { @@ -6804,8 +7169,8 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC( - parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts + = MALLOC(parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! type->defaultAtts) { type->allocDefaultAtts = 0; return 0; @@ -6830,8 +7195,8 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, } #endif - temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts, - (count * sizeof(DEFAULT_ATTRIBUTE))); + temp = REALLOC(parser, type->defaultAtts, + (count * sizeof(DEFAULT_ATTRIBUTE))); if (temp == NULL) return 0; type->allocDefaultAtts = count; @@ -7122,19 +7487,19 @@ normalizePublicId(XML_Char *publicId) { } static DTD * -dtdCreate(const XML_Memory_Handling_Suite *ms) { - DTD *p = ms->malloc_fcn(sizeof(DTD)); +dtdCreate(XML_Parser parser) { + DTD *p = MALLOC(parser, sizeof(DTD)); if (p == NULL) return p; - poolInit(&(p->pool), ms); - poolInit(&(p->entityValuePool), ms); - hashTableInit(&(p->generalEntities), ms); - hashTableInit(&(p->elementTypes), ms); - hashTableInit(&(p->attributeIds), ms); - hashTableInit(&(p->prefixes), ms); + poolInit(&(p->pool), parser); + poolInit(&(p->entityValuePool), parser); + hashTableInit(&(p->generalEntities), parser); + hashTableInit(&(p->elementTypes), parser); + hashTableInit(&(p->attributeIds), parser); + hashTableInit(&(p->prefixes), parser); #ifdef XML_DTD p->paramEntityRead = XML_FALSE; - hashTableInit(&(p->paramEntities), ms); + hashTableInit(&(p->paramEntities), parser); #endif /* XML_DTD */ p->defaultPrefix.name = NULL; p->defaultPrefix.binding = NULL; @@ -7154,7 +7519,7 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) { } static void -dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { +dtdReset(DTD *p, XML_Parser parser) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { @@ -7162,7 +7527,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { if (! e) break; if (e->allocDefaultAtts != 0) - ms->free_fcn(e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableClear(&(p->generalEntities)); #ifdef XML_DTD @@ -7179,9 +7544,9 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { p->in_eldecl = XML_FALSE; - ms->free_fcn(p->scaffIndex); + FREE(parser, p->scaffIndex); p->scaffIndex = NULL; - ms->free_fcn(p->scaffold); + FREE(parser, p->scaffold); p->scaffold = NULL; p->scaffLevel = 0; @@ -7195,7 +7560,7 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) { } static void -dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { +dtdDestroy(DTD *p, XML_Bool isDocEntity, XML_Parser parser) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { @@ -7203,7 +7568,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { if (! e) break; if (e->allocDefaultAtts != 0) - ms->free_fcn(e->defaultAtts); + FREE(parser, e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD @@ -7215,10 +7580,10 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { poolDestroy(&(p->pool)); poolDestroy(&(p->entityValuePool)); if (isDocEntity) { - ms->free_fcn(p->scaffIndex); - ms->free_fcn(p->scaffold); + FREE(parser, p->scaffIndex); + FREE(parser, p->scaffold); } - ms->free_fcn(p); + FREE(parser, p); } /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise. @@ -7226,7 +7591,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) { */ static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, - const XML_Memory_Handling_Suite *ms) { + XML_Parser parser) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ @@ -7307,7 +7672,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, } #endif newE->defaultAtts - = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + = MALLOC(parser, oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (! newE->defaultAtts) { return 0; } @@ -7469,7 +7834,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { /* table->size is a power of 2 */ table->size = (size_t)1 << INIT_POWER; tsize = table->size * sizeof(NAMED *); - table->v = table->mem->malloc_fcn(tsize); + table->v = MALLOC(table->parser, tsize); if (! table->v) { table->size = 0; return NULL; @@ -7509,7 +7874,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } size_t tsize = newSize * sizeof(NAMED *); - NAMED **newV = table->mem->malloc_fcn(tsize); + NAMED **newV = MALLOC(table->parser, tsize); if (! newV) return NULL; memset(newV, 0, tsize); @@ -7525,7 +7890,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } newV[j] = table->v[i]; } - table->mem->free_fcn(table->v); + FREE(table->parser, table->v); table->v = newV; table->power = newPower; table->size = newSize; @@ -7538,7 +7903,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { } } } - table->v[i] = table->mem->malloc_fcn(createSize); + table->v[i] = MALLOC(table->parser, createSize); if (! table->v[i]) return NULL; memset(table->v[i], 0, createSize); @@ -7551,7 +7916,7 @@ static void FASTCALL hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { - table->mem->free_fcn(table->v[i]); + FREE(table->parser, table->v[i]); table->v[i] = NULL; } table->used = 0; @@ -7561,17 +7926,17 @@ static void FASTCALL hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) - table->mem->free_fcn(table->v[i]); - table->mem->free_fcn(table->v); + FREE(table->parser, table->v[i]); + FREE(table->parser, table->v); } static void FASTCALL -hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { +hashTableInit(HASH_TABLE *p, XML_Parser parser) { p->power = 0; p->size = 0; p->used = 0; p->v = NULL; - p->mem = ms; + p->parser = parser; } static void FASTCALL @@ -7591,13 +7956,13 @@ hashTableIterNext(HASH_TABLE_ITER *iter) { } static void FASTCALL -poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) { +poolInit(STRING_POOL *pool, XML_Parser parser) { pool->blocks = NULL; pool->freeBlocks = NULL; pool->start = NULL; pool->ptr = NULL; pool->end = NULL; - pool->mem = ms; + pool->parser = parser; } static void FASTCALL @@ -7624,13 +7989,13 @@ poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; - pool->mem->free_fcn(p); + FREE(pool->parser, p); p = tem; } p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; - pool->mem->free_fcn(p); + FREE(pool->parser, p); p = tem; } } @@ -7785,8 +8150,7 @@ poolGrow(STRING_POOL *pool) { if (bytesToAllocate == 0) return XML_FALSE; - temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks, - (unsigned)bytesToAllocate); + temp = REALLOC(pool->parser, pool->blocks, bytesToAllocate); if (temp == NULL) return XML_FALSE; pool->blocks = temp; @@ -7826,7 +8190,7 @@ poolGrow(STRING_POOL *pool) { if (bytesToAllocate == 0) return XML_FALSE; - tem = pool->mem->malloc_fcn(bytesToAllocate); + tem = MALLOC(pool->parser, bytesToAllocate); if (! tem) return XML_FALSE; tem->size = blockSize; @@ -7857,12 +8221,17 @@ nextScaffoldPart(XML_Parser parser) { return -1; } #endif - dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); + dtd->scaffIndex = MALLOC(parser, parser->m_groupSize * sizeof(int)); if (! dtd->scaffIndex) return -1; dtd->scaffIndex[0] = 0; } + // Will casting to int be safe further down? + if (dtd->scaffCount > INT_MAX) { + return -1; + } + if (dtd->scaffCount >= dtd->scaffSize) { CONTENT_SCAFFOLD *temp; if (dtd->scaffold) { @@ -7880,21 +8249,20 @@ nextScaffoldPart(XML_Parser parser) { } #endif - temp = (CONTENT_SCAFFOLD *)REALLOC( - parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); + temp = REALLOC(parser, dtd->scaffold, + dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize *= 2; } else { - temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS - * sizeof(CONTENT_SCAFFOLD)); + temp = MALLOC(parser, INIT_SCAFFOLD_ELEMENTS * sizeof(CONTENT_SCAFFOLD)); if (temp == NULL) return -1; dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS; } dtd->scaffold = temp; } - next = dtd->scaffCount++; + next = (int)dtd->scaffCount++; me = &dtd->scaffold[next]; if (dtd->scaffLevel) { CONTENT_SCAFFOLD *parent @@ -7941,7 +8309,10 @@ build_model(XML_Parser parser) { const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content) + (dtd->contentStringLen * sizeof(XML_Char))); - ret = (XML_Content *)MALLOC(parser, allocsize); + // NOTE: We are avoiding MALLOC(..) here to so that + // applications that are not using XML_FreeContentModel but plain + // free(..) or .free_fcn() to free the content model's memory are safe. + ret = parser->m_mem.malloc_fcn(allocsize); if (! ret) return NULL; @@ -8062,7 +8433,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, } static XML_Char * -copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { +copyString(const XML_Char *s, XML_Parser parser) { size_t charsRequired = 0; XML_Char *result; @@ -8074,7 +8445,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) { charsRequired++; /* Now allocate space for the copy */ - result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char)); + result = MALLOC(parser, charsRequired * sizeof(XML_Char)); if (result == NULL) return NULL; /* Copy the original into place */ @@ -8093,10 +8464,10 @@ accountingGetCurrentAmplification(XML_Parser rootParser) { + rootParser->m_accounting.countBytesIndirect; const float amplificationFactor = rootParser->m_accounting.countBytesDirect - ? (countBytesOutput + ? ((float)countBytesOutput / (float)(rootParser->m_accounting.countBytesDirect)) - : ((lenOfShortestInclude - + rootParser->m_accounting.countBytesIndirect) + : ((float)(lenOfShortestInclude + + rootParser->m_accounting.countBytesIndirect) / (float)lenOfShortestInclude); assert(! rootParser->m_parentParser); return amplificationFactor; @@ -8280,6 +8651,8 @@ entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { rootParser->m_entity_stats.currentDepth--; } +#endif /* XML_GE == 1 */ + static XML_Parser getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { XML_Parser rootParser = parser; @@ -8295,6 +8668,8 @@ getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { return rootParser; } +#if XML_GE == 1 + const char * unsignedCharToPrintable(unsigned char c) { switch (c) { diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h index a7904274c91d4e..9d0d4ff11b7f98 100644 --- a/Modules/expat/xmlrole.h +++ b/Modules/expat/xmlrole.h @@ -10,7 +10,7 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2002 Karl Waclawek <karl@waclawek.net> Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> - Copyright (c) 2017-2024 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2017-2025 Sebastian Pipping <sebastian@pipping.org> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -34,19 +34,13 @@ */ #ifndef XmlRole_INCLUDED -#define XmlRole_INCLUDED 1 +# define XmlRole_INCLUDED 1 -#ifdef __VMS -/* 0 1 2 3 0 1 2 3 - 1234567890123456789012345678901 1234567890123456789012345678901 */ -# define XmlPrologStateInitExternalEntity XmlPrologStateInitExternalEnt -#endif +# include "xmltok.h" -#include "xmltok.h" - -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif enum { XML_ROLE_ERROR = -1, @@ -107,11 +101,11 @@ enum { XML_ROLE_CONTENT_ELEMENT_PLUS, XML_ROLE_PI, XML_ROLE_COMMENT, -#ifdef XML_DTD +# ifdef XML_DTD XML_ROLE_TEXT_DECL, XML_ROLE_IGNORE_SECT, XML_ROLE_INNER_PARAM_ENTITY_REF, -#endif /* XML_DTD */ +# endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF }; @@ -120,23 +114,23 @@ typedef struct prolog_state { const char *end, const ENCODING *enc); unsigned level; int role_none; -#ifdef XML_DTD +# ifdef XML_DTD unsigned includeLevel; int documentEntity; int inEntityValue; -#endif /* XML_DTD */ +# endif /* XML_DTD */ } PROLOG_STATE; void XmlPrologStateInit(PROLOG_STATE *state); -#ifdef XML_DTD +# ifdef XML_DTD void XmlPrologStateInitExternalEntity(PROLOG_STATE *state); -#endif /* XML_DTD */ +# endif /* XML_DTD */ -#define XmlTokenRole(state, tok, ptr, end, enc) \ - (((state)->handler)(state, tok, ptr, end, enc)) +# define XmlTokenRole(state, tok, ptr, end, enc) \ + (((state)->handler)(state, tok, ptr, end, enc)) -#ifdef __cplusplus +# ifdef __cplusplus } -#endif +# endif #endif /* not XmlRole_INCLUDED */ diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c index 29a66d72ceea5e..95d5e84b67f11c 100644 --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -1398,7 +1398,7 @@ unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, } ENCODING * -XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, +XmlInitUnknownEncoding(void *mem, const int *table, CONVERTER convert, void *userData) { int i; struct unknown_encoding *e = (struct unknown_encoding *)mem; @@ -1661,7 +1661,7 @@ initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, # undef ns ENCODING * -XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, +XmlInitUnknownEncodingNS(void *mem, const int *table, CONVERTER convert, void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h index c51fce1ec1518b..79a9fb76871f10 100644 --- a/Modules/expat/xmltok.h +++ b/Modules/expat/xmltok.h @@ -35,113 +35,113 @@ */ #ifndef XmlTok_INCLUDED -#define XmlTok_INCLUDED 1 +# define XmlTok_INCLUDED 1 -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif /* The following token may be returned by XmlContentTok */ -#define XML_TOK_TRAILING_RSQB \ - -5 /* ] or ]] at the end of the scan; might be \ - start of illegal ]]> sequence */ +# define XML_TOK_TRAILING_RSQB \ + -5 /* ] or ]] at the end of the scan; might be \ + start of illegal ]]> sequence */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_NONE -4 /* The string to be scanned is empty */ -#define XML_TOK_TRAILING_CR \ - -3 /* A CR at the end of the scan; \ - might be part of CRLF sequence */ -#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ -#define XML_TOK_PARTIAL -1 /* only part of a token */ -#define XML_TOK_INVALID 0 +# define XML_TOK_NONE -4 /* The string to be scanned is empty */ +# define XML_TOK_TRAILING_CR \ + -3 /* A CR at the end of the scan; \ + might be part of CRLF sequence */ +# define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ +# define XML_TOK_PARTIAL -1 /* only part of a token */ +# define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. */ -#define XML_TOK_START_TAG_WITH_ATTS 1 -#define XML_TOK_START_TAG_NO_ATTS 2 -#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ -#define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 -#define XML_TOK_END_TAG 5 -#define XML_TOK_DATA_CHARS 6 -#define XML_TOK_DATA_NEWLINE 7 -#define XML_TOK_CDATA_SECT_OPEN 8 -#define XML_TOK_ENTITY_REF 9 -#define XML_TOK_CHAR_REF 10 /* numeric character reference */ +# define XML_TOK_START_TAG_WITH_ATTS 1 +# define XML_TOK_START_TAG_NO_ATTS 2 +# define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ +# define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 +# define XML_TOK_END_TAG 5 +# define XML_TOK_DATA_CHARS 6 +# define XML_TOK_DATA_NEWLINE 7 +# define XML_TOK_CDATA_SECT_OPEN 8 +# define XML_TOK_ENTITY_REF 9 +# define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok. */ -#define XML_TOK_PI 11 /* processing instruction */ -#define XML_TOK_XML_DECL 12 /* XML decl or text decl */ -#define XML_TOK_COMMENT 13 -#define XML_TOK_BOM 14 /* Byte order mark */ +# define XML_TOK_PI 11 /* processing instruction */ +# define XML_TOK_XML_DECL 12 /* XML decl or text decl */ +# define XML_TOK_COMMENT 13 +# define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ -#define XML_TOK_PROLOG_S 15 -#define XML_TOK_DECL_OPEN 16 /* <!foo */ -#define XML_TOK_DECL_CLOSE 17 /* > */ -#define XML_TOK_NAME 18 -#define XML_TOK_NMTOKEN 19 -#define XML_TOK_POUND_NAME 20 /* #name */ -#define XML_TOK_OR 21 /* | */ -#define XML_TOK_PERCENT 22 -#define XML_TOK_OPEN_PAREN 23 -#define XML_TOK_CLOSE_PAREN 24 -#define XML_TOK_OPEN_BRACKET 25 -#define XML_TOK_CLOSE_BRACKET 26 -#define XML_TOK_LITERAL 27 -#define XML_TOK_PARAM_ENTITY_REF 28 -#define XML_TOK_INSTANCE_START 29 +# define XML_TOK_PROLOG_S 15 +# define XML_TOK_DECL_OPEN 16 /* <!foo */ +# define XML_TOK_DECL_CLOSE 17 /* > */ +# define XML_TOK_NAME 18 +# define XML_TOK_NMTOKEN 19 +# define XML_TOK_POUND_NAME 20 /* #name */ +# define XML_TOK_OR 21 /* | */ +# define XML_TOK_PERCENT 22 +# define XML_TOK_OPEN_PAREN 23 +# define XML_TOK_CLOSE_PAREN 24 +# define XML_TOK_OPEN_BRACKET 25 +# define XML_TOK_CLOSE_BRACKET 26 +# define XML_TOK_LITERAL 27 +# define XML_TOK_PARAM_ENTITY_REF 28 +# define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ -#define XML_TOK_NAME_QUESTION 30 /* name? */ -#define XML_TOK_NAME_ASTERISK 31 /* name* */ -#define XML_TOK_NAME_PLUS 32 /* name+ */ -#define XML_TOK_COND_SECT_OPEN 33 /* <![ */ -#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ -#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ -#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ -#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ -#define XML_TOK_COMMA 38 +# define XML_TOK_NAME_QUESTION 30 /* name? */ +# define XML_TOK_NAME_ASTERISK 31 /* name* */ +# define XML_TOK_NAME_PLUS 32 /* name+ */ +# define XML_TOK_COND_SECT_OPEN 33 /* <![ */ +# define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ +# define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ +# define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ +# define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ +# define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ -#define XML_TOK_ATTRIBUTE_VALUE_S 39 +# define XML_TOK_ATTRIBUTE_VALUE_S 39 /* The following token is returned only by XmlCdataSectionTok */ -#define XML_TOK_CDATA_SECT_CLOSE 40 +# define XML_TOK_CDATA_SECT_CLOSE 40 /* With namespace processing this is returned by XmlPrologTok for a name with a colon. */ -#define XML_TOK_PREFIXED_NAME 41 +# define XML_TOK_PREFIXED_NAME 41 -#ifdef XML_DTD -# define XML_TOK_IGNORE_SECT 42 -#endif /* XML_DTD */ +# ifdef XML_DTD +# define XML_TOK_IGNORE_SECT 42 +# endif /* XML_DTD */ -#ifdef XML_DTD -# define XML_N_STATES 4 -#else /* not XML_DTD */ -# define XML_N_STATES 3 -#endif /* not XML_DTD */ +# ifdef XML_DTD +# define XML_N_STATES 4 +# else /* not XML_DTD */ +# define XML_N_STATES 3 +# endif /* not XML_DTD */ -#define XML_PROLOG_STATE 0 -#define XML_CONTENT_STATE 1 -#define XML_CDATA_SECTION_STATE 2 -#ifdef XML_DTD -# define XML_IGNORE_SECTION_STATE 3 -#endif /* XML_DTD */ +# define XML_PROLOG_STATE 0 +# define XML_CONTENT_STATE 1 +# define XML_CDATA_SECTION_STATE 2 +# ifdef XML_DTD +# define XML_IGNORE_SECTION_STATE 3 +# endif /* XML_DTD */ -#define XML_N_LITERAL_TYPES 2 -#define XML_ATTRIBUTE_VALUE_LITERAL 0 -#define XML_ENTITY_VALUE_LITERAL 1 +# define XML_N_LITERAL_TYPES 2 +# define XML_ATTRIBUTE_VALUE_LITERAL 0 +# define XML_ENTITY_VALUE_LITERAL 1 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ -#define XML_UTF8_ENCODE_MAX 4 +# define XML_UTF8_ENCODE_MAX 4 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ -#define XML_UTF16_ENCODE_MAX 2 +# define XML_UTF16_ENCODE_MAX 2 typedef struct position { /* first line and first column are 0 not 1 */ @@ -220,63 +220,63 @@ struct encoding { the prolog outside literals, comments and processing instructions. */ -#define XmlTok(enc, state, ptr, end, nextTokPtr) \ - (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) +# define XmlTok(enc, state, ptr, end, nextTokPtr) \ + (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) -#define XmlPrologTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) +# define XmlPrologTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) -#define XmlContentTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) +# define XmlContentTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) -#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) -#ifdef XML_DTD +# ifdef XML_DTD -# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ - XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) +# define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) -#endif /* XML_DTD */ +# endif /* XML_DTD */ /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ -#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ - (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) +# define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ + (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) -#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) +# define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ - XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) +# define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ + XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) -#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ - (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) +# define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ + (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) -#define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) +# define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) -#define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) +# define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) -#define XmlGetAttributes(enc, ptr, attsMax, atts) \ - (((enc)->getAtts)(enc, ptr, attsMax, atts)) +# define XmlGetAttributes(enc, ptr, attsMax, atts) \ + (((enc)->getAtts)(enc, ptr, attsMax, atts)) -#define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) +# define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) -#define XmlPredefinedEntityName(enc, ptr, end) \ - (((enc)->predefinedEntityName)(enc, ptr, end)) +# define XmlPredefinedEntityName(enc, ptr, end) \ + (((enc)->predefinedEntityName)(enc, ptr, end)) -#define XmlUpdatePosition(enc, ptr, end, pos) \ - (((enc)->updatePosition)(enc, ptr, end, pos)) +# define XmlUpdatePosition(enc, ptr, end, pos) \ + (((enc)->updatePosition)(enc, ptr, end, pos)) -#define XmlIsPublicId(enc, ptr, end, badPtr) \ - (((enc)->isPublicId)(enc, ptr, end, badPtr)) +# define XmlIsPublicId(enc, ptr, end, badPtr) \ + (((enc)->isPublicId)(enc, ptr, end, badPtr)) -#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ - (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) +# define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ + (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) -#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ - (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) +# define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ + (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { ENCODING initEnc; @@ -299,7 +299,7 @@ int XmlSizeOfUnknownEncoding(void); typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); -ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, +ENCODING *XmlInitUnknownEncoding(void *mem, const int *table, CONVERTER convert, void *userData); int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, @@ -312,10 +312,10 @@ int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, const char *name); const ENCODING *XmlGetUtf8InternalEncodingNS(void); const ENCODING *XmlGetUtf16InternalEncodingNS(void); -ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, - void *userData); -#ifdef __cplusplus +ENCODING *XmlInitUnknownEncodingNS(void *mem, const int *table, + CONVERTER convert, void *userData); +# ifdef __cplusplus } -#endif +# endif #endif /* not XmlTok_INCLUDED */ diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c index d49ce794d88674..2495faa38170b7 100644 --- a/Modules/faulthandler.c +++ b/Modules/faulthandler.c @@ -525,6 +525,11 @@ faulthandler_enable(void) } #endif + // gh-137185: Initialize C stack trace dumping outside of the signal + // handler. Specifically, we call backtrace() to ensure that libgcc is + // dynamically loaded outside of the signal handler. + _Py_InitDumpStack(); + for (size_t i=0; i < faulthandler_nsignals; i++) { fault_handler_t *handler; int err; diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index 220ee9ecdffc8a..690882e34a8e2c 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -41,22 +41,28 @@ fcntl.fcntl arg: object(c_default='NULL') = 0 / -Perform the operation `cmd` on file descriptor fd. - -The values used for `cmd` are operating system dependent, and are available -as constants in the fcntl module, using the same names as used in -the relevant C header files. The argument arg is optional, and -defaults to 0; it may be an int or a string. If arg is given as a string, -the return value of fcntl is a string of that length, containing the -resulting value put in the arg buffer by the operating system. The length -of the arg string is not allowed to exceed 1024 bytes. If the arg given -is an integer or if none is specified, the result value is an integer -corresponding to the return value of the fcntl call in the C code. +Perform the operation cmd on file descriptor fd. + +The values used for cmd are operating system dependent, and are +available as constants in the fcntl module, using the same names as used +in the relevant C header files. The argument arg is optional, and +defaults to 0; it may be an integer, a bytes-like object or a string. +If arg is given as a string, it will be encoded to binary using the +UTF-8 encoding. + +If the arg given is an integer or if none is specified, the result value +is an integer corresponding to the return value of the fcntl() call in +the C code. + +If arg is given as a bytes-like object, the return value of fcntl() is a +bytes object of that length, containing the resulting value put in the +arg buffer by the operating system. The length of the arg buffer is not +allowed to exceed 1024 bytes. [clinic start generated code]*/ static PyObject * fcntl_fcntl_impl(PyObject *module, int fd, int code, PyObject *arg) -/*[clinic end generated code: output=888fc93b51c295bd input=7955340198e5f334]*/ +/*[clinic end generated code: output=888fc93b51c295bd input=56c6d6196a4854df]*/ { int ret; int async_err = 0; @@ -135,40 +141,40 @@ fcntl.ioctl mutate_flag as mutate_arg: bool = True / -Perform the operation `request` on file descriptor `fd`. +Perform the operation request on file descriptor fd. -The values used for `request` are operating system dependent, and are available -as constants in the fcntl or termios library modules, using the same names as -used in the relevant C header files. +The values used for request are operating system dependent, and are +available as constants in the fcntl or termios library modules, using +the same names as used in the relevant C header files. -The argument `arg` is optional, and defaults to 0; it may be an int or a -buffer containing character data (most likely a string or an array). +The argument arg is optional, and defaults to 0; it may be an integer, a +bytes-like object or a string. If arg is given as a string, it will be +encoded to binary using the UTF-8 encoding. -If the argument is a mutable buffer (such as an array) and if the -mutate_flag argument (which is only allowed in this case) is true then the -buffer is (in effect) passed to the operating system and changes made by -the OS will be reflected in the contents of the buffer after the call has -returned. The return value is the integer returned by the ioctl system -call. +If the arg given is an integer or if none is specified, the result value +is an integer corresponding to the return value of the ioctl() call in +the C code. -If the argument is a mutable buffer and the mutable_flag argument is false, -the behavior is as if a string had been passed. +If the argument is a mutable buffer (such as a bytearray) and the +mutate_flag argument is true (default) then the buffer is (in effect) +passed to the operating system and changes made by the OS will be +reflected in the contents of the buffer after the call has returned. +The return value is the integer returned by the ioctl() system call. -If the argument is an immutable buffer (most likely a string) then a copy -of the buffer is passed to the operating system and the return value is a -string of the same length containing whatever the operating system put in -the buffer. The length of the arg buffer in this case is not allowed to -exceed 1024 bytes. +If the argument is a mutable buffer and the mutable_flag argument is +false, the behavior is as if an immutable buffer had been passed. -If the arg given is an integer or if none is specified, the result value is -an integer corresponding to the return value of the ioctl call in the C -code. +If the argument is an immutable buffer then a copy of the buffer is +passed to the operating system and the return value is a bytes object of +the same length containing whatever the operating system put in the +buffer. The length of the arg buffer in this case is not allowed to +exceed 1024 bytes. [clinic start generated code]*/ static PyObject * fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, int mutate_arg) -/*[clinic end generated code: output=f72baba2454d7a62 input=9c6cca5e2c339622]*/ +/*[clinic end generated code: output=f72baba2454d7a62 input=b69717a5588e086e]*/ { /* We use the unsigned non-checked 'I' format for the 'code' parameter because the system expects it to be a 32bit bit field value @@ -290,7 +296,7 @@ fcntl.flock operation as code: int / -Perform the lock operation `operation` on file descriptor `fd`. +Perform the lock operation on file descriptor fd. See the Unix manual page for flock(2) for details (On some systems, this function is emulated using fcntl()). @@ -298,7 +304,7 @@ function is emulated using fcntl()). static PyObject * fcntl_flock_impl(PyObject *module, int fd, int code) -/*[clinic end generated code: output=84059e2b37d2fc64 input=0bfc00f795953452]*/ +/*[clinic end generated code: output=84059e2b37d2fc64 input=ade68943e8599f0a]*/ { int ret; int async_err = 0; @@ -361,22 +367,22 @@ fcntl.lockf A wrapper around the fcntl() locking calls. -`fd` is the file descriptor of the file to lock or unlock, and operation is one -of the following values: +fd is the file descriptor of the file to lock or unlock, and operation +is one of the following values: LOCK_UN - unlock LOCK_SH - acquire a shared lock LOCK_EX - acquire an exclusive lock When operation is LOCK_SH or LOCK_EX, it can also be bitwise ORed with -LOCK_NB to avoid blocking on lock acquisition. If LOCK_NB is used and the -lock cannot be acquired, an OSError will be raised and the exception will -have an errno attribute set to EACCES or EAGAIN (depending on the operating -system -- for portability, check for either value). +LOCK_NB to avoid blocking on lock acquisition. If LOCK_NB is used and +the lock cannot be acquired, an OSError will be raised and the exception +will have an errno attribute set to EACCES or EAGAIN (depending on the +operating system -- for portability, check for either value). -`len` is the number of bytes to lock, with the default meaning to lock to -EOF. `start` is the byte offset, relative to `whence`, to that the lock -starts. `whence` is as with fileobj.seek(), specifically: +len is the number of bytes to lock, with the default meaning to lock to +EOF. start is the byte offset, relative to whence, to that the lock +starts. whence is as with fileobj.seek(), specifically: 0 - relative to the start of the file (SEEK_SET) 1 - relative to the current buffer position (SEEK_CUR) @@ -386,7 +392,7 @@ starts. `whence` is as with fileobj.seek(), specifically: static PyObject * fcntl_lockf_impl(PyObject *module, int fd, int code, PyObject *lenobj, PyObject *startobj, int whence) -/*[clinic end generated code: output=4985e7a172e7461a input=5480479fc63a04b8]*/ +/*[clinic end generated code: output=4985e7a172e7461a input=369bef4d7a1c5ff4]*/ { int ret; int async_err = 0; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index ad13496b06deaf..a09c4856f1166d 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -476,7 +476,7 @@ PyDoc_STRVAR(gc__doc__, "set_debug() -- Set debugging flags.\n" "get_debug() -- Get debugging flags.\n" "set_threshold() -- Set the collection thresholds.\n" -"get_threshold() -- Return the current the collection thresholds.\n" +"get_threshold() -- Return the current collection thresholds.\n" "get_objects() -- Return a list of all objects tracked by the collector.\n" "is_tracked() -- Returns true if a given object is tracked.\n" "is_finalized() -- Returns true if a given object has been already finalized.\n" diff --git a/Modules/getpath.py b/Modules/getpath.py index be2210345afbda..b89d7427e3febd 100644 --- a/Modules/getpath.py +++ b/Modules/getpath.py @@ -364,10 +364,9 @@ def search_up(prefix, *landmarks, test=isfile): venv_prefix = None pyvenvcfg = [] - # Search for the 'home' key in pyvenv.cfg. Currently, we don't consider the - # presence of a pyvenv.cfg file without a 'home' key to signify the - # existence of a virtual environment — we quietly ignore them. - # XXX: If we don't find a 'home' key, we don't look for another pyvenv.cfg! + # Search for the 'home' key in pyvenv.cfg. If a home key isn't found, + # then it means a venv is active and home is based on the venv's + # executable (if its a symlink, home is where the symlink points). for line in pyvenvcfg: key, had_equ, value = line.partition('=') if had_equ and key.strip().lower() == 'home': @@ -412,10 +411,8 @@ def search_up(prefix, *landmarks, test=isfile): if isfile(candidate): base_executable = candidate break + # home key found; stop iterating over lines break - else: - # We didn't find a 'home' key in pyvenv.cfg (no break), reset venv_prefix. - venv_prefix = None # ****************************************************************************** diff --git a/Modules/grpmodule.c b/Modules/grpmodule.c index 29da9936b65504..652958618a2c4c 100644 --- a/Modules/grpmodule.c +++ b/Modules/grpmodule.c @@ -55,6 +55,11 @@ get_grp_state(PyObject *module) static struct PyModuleDef grpmodule; +/* Mutex to protect calls to getgrgid(), getgrnam(), and getgrent(). + * These functions return pointer to static data structure, which + * may be overwritten by any subsequent calls. */ +static PyMutex group_db_mutex = {0}; + #define DEFAULT_BUFFER_SIZE 1024 static PyObject * @@ -168,9 +173,15 @@ grp_getgrgid_impl(PyObject *module, PyObject *id) Py_END_ALLOW_THREADS #else + PyMutex_Lock(&group_db_mutex); + // The getgrgid() function need not be thread-safe. + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/getgrgid.html p = getgrgid(gid); #endif if (p == NULL) { +#ifndef HAVE_GETGRGID_R + PyMutex_Unlock(&group_db_mutex); +#endif PyMem_RawFree(buf); if (nomem == 1) { return PyErr_NoMemory(); @@ -185,6 +196,8 @@ grp_getgrgid_impl(PyObject *module, PyObject *id) retval = mkgrent(module, p); #ifdef HAVE_GETGRGID_R PyMem_RawFree(buf); +#else + PyMutex_Unlock(&group_db_mutex); #endif return retval; } @@ -249,9 +262,15 @@ grp_getgrnam_impl(PyObject *module, PyObject *name) Py_END_ALLOW_THREADS #else + PyMutex_Lock(&group_db_mutex); + // The getgrnam() function need not be thread-safe. + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/getgrnam.html p = getgrnam(name_chars); #endif if (p == NULL) { +#ifndef HAVE_GETGRNAM_R + PyMutex_Unlock(&group_db_mutex); +#endif if (nomem == 1) { PyErr_NoMemory(); } @@ -261,6 +280,9 @@ grp_getgrnam_impl(PyObject *module, PyObject *name) goto out; } retval = mkgrent(module, p); +#ifndef HAVE_GETGRNAM_R + PyMutex_Unlock(&group_db_mutex); +#endif out: PyMem_RawFree(buf); Py_DECREF(bytes); @@ -285,8 +307,7 @@ grp_getgrall_impl(PyObject *module) return NULL; } - static PyMutex getgrall_mutex = {0}; - PyMutex_Lock(&getgrall_mutex); + PyMutex_Lock(&group_db_mutex); setgrent(); struct group *p; @@ -306,7 +327,7 @@ grp_getgrall_impl(PyObject *module) done: endgrent(); - PyMutex_Unlock(&getgrall_mutex); + PyMutex_Unlock(&group_db_mutex); return d; } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 7105e68af7b806..a80b195a765792 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -76,3 +76,32 @@ * to allow the user to optimize based on the platform they're using. */ #define HASHLIB_GIL_MINSIZE 2048 +static inline int +_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) +{ + if (data != NULL && string == NULL) { + // called as H(data) or H(data=...) + *res = data; + return 1; + } + else if (data == NULL && string != NULL) { + // called as H(string=...) + *res = string; + return 1; + } + else if (data == NULL && string == NULL) { + // fast path when no data is given + assert(!PyErr_Occurred()); + *res = NULL; + return 0; + } + else { + // called as H(data=..., string) + *res = NULL; + PyErr_SetString(PyExc_TypeError, + "'data' and 'string' are mutually exclusive " + "and support for 'string' keyword parameter " + "is slated for removal in a future version."); + return -1; + } +} diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index c7b49d4dee3d0a..8cd470f4f80b3a 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -234,24 +235,24 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, uint32_t, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, uint32_t, (C statements)) */ #ifndef NDEBUG #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code, (ALGORITHM)) < 0) { \ + if (_hacl_convert_errno(code) < 0) { \ ERRACTION; \ } \ } while (0) #else #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) \ do { \ (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ @@ -274,17 +275,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ BUF += UINT32_MAX; \ LEN -= UINT32_MAX; \ } \ @@ -292,7 +293,7 @@ typedef struct py_hmac_hacl_api { #else #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) #endif @@ -301,17 +302,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #define Py_HMAC_HACL_UPDATE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ } while (0) /* @@ -464,7 +465,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -472,7 +473,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -491,38 +492,40 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) * Otherwise, this sets an appropriate exception and returns -1. */ static int -_hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) +_hacl_convert_errno(hacl_errno_t code) { + assert(PyGILState_GetThisThreadState() != NULL); + if (code == Hacl_Streaming_Types_Success) { + return 0; + } + + PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { - case Hacl_Streaming_Types_Success: { - return 0; - } case Hacl_Streaming_Types_InvalidAlgorithm: { - // only makes sense if an algorithm is known at call time - assert(algorithm != NULL); - assert(PyUnicode_CheckExact(algorithm)); - PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm); - return -1; + PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm"); + break; } case Hacl_Streaming_Types_InvalidLength: { PyErr_SetString(PyExc_ValueError, "invalid length"); - return -1; + break; } case Hacl_Streaming_Types_MaximumLengthExceeded: { PyErr_SetString(PyExc_OverflowError, "maximum length exceeded"); - return -1; + break; } case Hacl_Streaming_Types_OutOfMemory: { PyErr_NoMemory(); - return -1; + break; } default: { PyErr_Format(PyExc_RuntimeError, - "HACL* internal routine failed with error code: %d", + "HACL* internal routine failed with error code: %u", code); - return -1; + break; } } + PyGILState_Release(gstate); + return -1; } /* @@ -536,7 +539,7 @@ _hacl_hmac_state_new(HMAC_Hash_Kind kind, uint8_t *key, uint32_t len) assert(kind != Py_hmac_kind_hash_unknown); HACL_HMAC_state *state = NULL; hacl_errno_t retcode = Hacl_Streaming_HMAC_malloc_(kind, key, len, &state); - if (_hacl_convert_errno(retcode, NULL) < 0) { + if (_hacl_convert_errno(retcode) < 0) { assert(state == NULL); return NULL; } @@ -804,13 +807,13 @@ hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) } if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, return -1); + Py_HMAC_HACL_UPDATE(self->state, msg, len, return -1); return 0; } int res = 0; Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, msg, len, goto error); goto done; #ifndef NDEBUG error: @@ -978,7 +981,7 @@ hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) int res = 0; Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); goto done; #ifndef NDEBUG error: @@ -1005,7 +1008,7 @@ static int hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) { ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); LEAVE_HASHLIB(self); return 0; @@ -1076,7 +1079,7 @@ hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory ); - return _hacl_convert_errno(rc, NULL); + return _hacl_convert_errno(rc); } /*[clinic input] @@ -1759,7 +1762,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1769,7 +1772,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 943c1e8607b38f..a48d2fe76d0bf2 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -376,7 +376,7 @@ pairwise_next(PyObject *op) } result = po->result; - if (Py_REFCNT(result) == 1) { + if (_PyObject_IsUniquelyReferenced(result)) { Py_INCREF(result); PyObject *last_old = PyTuple_GET_ITEM(result, 0); PyObject *last_new = PyTuple_GET_ITEM(result, 1); @@ -802,7 +802,7 @@ teedataobject_traverse(PyObject *op, visitproc visit, void * arg) static void teedataobject_safe_decref(PyObject *obj) { - while (obj && Py_REFCNT(obj) == 1) { + while (obj && _PyObject_IsUniquelyReferenced(obj)) { teedataobject *tmp = teedataobject_CAST(obj); PyObject *nextlink = tmp->nextlink; tmp->nextlink = NULL; @@ -2114,7 +2114,7 @@ product_next(PyObject *op) Py_ssize_t *indices = lz->indices; /* Copy the previous result tuple or re-use it if available */ - if (Py_REFCNT(result) > 1) { + if (!_PyObject_IsUniquelyReferenced(result)) { PyObject *old_result = result; result = _PyTuple_FromArray(_PyTuple_ITEMS(old_result), npools); if (result == NULL) @@ -2343,7 +2343,7 @@ combinations_next(PyObject *op) } } else { /* Copy the previous result tuple or re-use it if available */ - if (Py_REFCNT(result) > 1) { + if (!_PyObject_IsUniquelyReferenced(result)) { PyObject *old_result = result; result = _PyTuple_FromArray(_PyTuple_ITEMS(old_result), r); if (result == NULL) @@ -2589,7 +2589,7 @@ cwr_next(PyObject *op) } } else { /* Copy the previous result tuple or re-use it if available */ - if (Py_REFCNT(result) > 1) { + if (!_PyObject_IsUniquelyReferenced(result)) { PyObject *old_result = result; result = _PyTuple_FromArray(_PyTuple_ITEMS(old_result), r); if (result == NULL) @@ -2850,7 +2850,7 @@ permutations_next(PyObject *op) goto empty; /* Copy the previous result tuple or re-use it if available */ - if (Py_REFCNT(result) > 1) { + if (!_PyObject_IsUniquelyReferenced(result)) { PyObject *old_result = result; result = _PyTuple_FromArray(_PyTuple_ITEMS(old_result), r); if (result == NULL) @@ -3818,7 +3818,7 @@ zip_longest_next(PyObject *op) return NULL; if (lz->numactive == 0) return NULL; - if (Py_REFCNT(result) == 1) { + if (_PyObject_IsUniquelyReferenced(result)) { Py_INCREF(result); for (i=0 ; i < tuplesize ; i++) { it = PyTuple_GET_ITEM(lz->ittuple, i); diff --git a/Modules/main.c b/Modules/main.c index ea1239ecc57f00..9b2ee103d52662 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -269,13 +269,14 @@ pymain_run_command(wchar_t *command) static int -pymain_start_pyrepl_no_main(void) +pymain_start_pyrepl(int pythonstartup) { int res = 0; PyObject *console = NULL; PyObject *empty_tuple = NULL; PyObject *kwargs = NULL; PyObject *console_result = NULL; + PyObject *main_module = NULL; PyObject *pyrepl = PyImport_ImportModule("_pyrepl.main"); if (pyrepl == NULL) { @@ -299,7 +300,13 @@ pymain_start_pyrepl_no_main(void) res = pymain_exit_err_print(); goto done; } - if (!PyDict_SetItemString(kwargs, "pythonstartup", _PyLong_GetOne())) { + main_module = PyImport_AddModuleRef("__main__"); + if (main_module == NULL) { + res = pymain_exit_err_print(); + goto done; + } + if (!PyDict_SetItemString(kwargs, "mainmodule", main_module) + && !PyDict_SetItemString(kwargs, "pythonstartup", pythonstartup ? Py_True : Py_False)) { console_result = PyObject_Call(console, empty_tuple, kwargs); if (console_result == NULL) { res = pymain_exit_err_print(); @@ -311,6 +318,7 @@ pymain_start_pyrepl_no_main(void) Py_XDECREF(empty_tuple); Py_XDECREF(console); Py_XDECREF(pyrepl); + Py_XDECREF(main_module); return res; } @@ -562,7 +570,7 @@ pymain_run_stdin(PyConfig *config) int run = PyRun_AnyFileExFlags(stdin, "<stdin>", 0, &cf); return (run != 0); } - return pymain_run_module(L"_pyrepl", 0); + return pymain_start_pyrepl(0); } @@ -595,7 +603,7 @@ pymain_repl(PyConfig *config, int *exitcode) *exitcode = (run != 0); return; } - int run = pymain_start_pyrepl_no_main(); + int run = pymain_start_pyrepl(1); *exitcode = (run != 0); return; } diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 11d9b7418a25a2..71d9c1387f5780 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2008,13 +2008,11 @@ math.factorial / Find n!. - -Raise a ValueError if x is negative or non-integral. [clinic start generated code]*/ static PyObject * math_factorial(PyObject *module, PyObject *arg) -/*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/ +/*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/ { long x, two_valuation; int overflow; @@ -2163,6 +2161,27 @@ math_ldexp_impl(PyObject *module, double x, PyObject *i) } else { errno = 0; r = ldexp(x, (int)exp); +#ifdef _MSC_VER + if (DBL_MIN > r && r > -DBL_MIN) { + /* Denormal (or zero) results can be incorrectly rounded here (rather, + truncated). Fixed in newer versions of the C runtime, included + with Windows 11. */ + int original_exp; + frexp(x, &original_exp); + if (original_exp > DBL_MIN_EXP) { + /* Shift down to the smallest normal binade. No bits lost. */ + int shift = DBL_MIN_EXP - original_exp; + x = ldexp(x, shift); + exp -= shift; + } + /* Multiplying by 2**exp finishes the job, and the HW will round as + appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted + to be < 0.5ULP of smallest denorm, so should be thrown away. If + exp is so very negative that ldexp underflows to 0, that's fine; + no need to check in advance. */ + r = x*ldexp(1.0, (int)exp); + } +#endif if (isinf(r)) errno = ERANGE; } diff --git a/Modules/md5module.c b/Modules/md5module.c index c36eb41d4d201e..9b5ea2d6e02605 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -276,17 +276,24 @@ static PyType_Spec md5_type_spec = { /*[clinic input] _md5.md5 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new MD5 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/ +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=d45e187d3d16f3a8 input=7ea5c5366dbb44bf]*/ { + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + MD5object *new; Py_buffer buf; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 6a385562845849..43f4763c4fdc0d 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -23,8 +23,10 @@ #endif #include <Python.h> +#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_stat_struct +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> // offsetof() #ifndef MS_WINDOWS @@ -89,6 +91,12 @@ my_getpagesize(void) # define MAP_ANONYMOUS MAP_ANON #endif +/*[clinic input] +module mmap +class mmap.mmap "mmap_object *" "" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=82a9f8a529905b9b]*/ + typedef enum { ACCESS_DEFAULT, @@ -118,6 +126,7 @@ typedef struct { #ifdef UNIX int fd; _Bool trackfd; + int flags; #endif PyObject *weakreflist; @@ -126,6 +135,27 @@ typedef struct { #define mmap_object_CAST(op) ((mmap_object *)(op)) +#include "clinic/mmapmodule.c.h" + + +/* Return a Py_ssize_t from the object arg. This conversion logic is similar + to what AC uses for `Py_ssize_t` arguments. + + Returns -1 on error. Use PyErr_Occurred() to disambiguate. +*/ +static Py_ssize_t +_As_Py_ssize_t(PyObject *arg) { + assert(arg != NULL); + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(arg); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + return ival; +} + + static int mmap_object_traverse(PyObject *op, visitproc visit, void *arg) { @@ -163,17 +193,22 @@ mmap_object_dealloc(PyObject *op) Py_END_ALLOW_THREADS #endif /* UNIX */ - if (m_obj->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, m_obj->weakreflist); tp->tp_free(m_obj); Py_DECREF(tp); } +/*[clinic input] +@critical_section +mmap.mmap.close + +[clinic start generated code]*/ + static PyObject * -mmap_close_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_close_impl(mmap_object *self) +/*[clinic end generated code: output=a1ae0c727546f78d input=25020035f047eae1]*/ { - mmap_object *self = mmap_object_CAST(op); if (self->exports > 0) { PyErr_SetString(PyExc_BufferError, "cannot close "\ "exported pointers exist"); @@ -461,10 +496,16 @@ _safe_PyBytes_FromStringAndSize(char *start, size_t num_bytes) { } } +/*[clinic input] +@critical_section +mmap.mmap.read_byte + +[clinic start generated code]*/ + static PyObject * -mmap_read_byte_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_read_byte_impl(mmap_object *self) +/*[clinic end generated code: output=d931da1319f3869b input=5b8c6a904bdddda9]*/ { - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); if (self->pos >= self->size) { PyErr_SetString(PyExc_ValueError, "read byte out of range"); @@ -478,12 +519,18 @@ mmap_read_byte_method(PyObject *op, PyObject *Py_UNUSED(ignored)) return PyLong_FromLong((unsigned char) dest); } +/*[clinic input] +@critical_section +mmap.mmap.readline + +[clinic start generated code]*/ + static PyObject * -mmap_read_line_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_readline_impl(mmap_object *self) +/*[clinic end generated code: output=b9d2bf9999283311 input=2c4efd1d06e1cdd1]*/ { Py_ssize_t remaining; char *start, *eol; - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); @@ -508,16 +555,22 @@ mmap_read_line_method(PyObject *op, PyObject *Py_UNUSED(ignored)) return result; } +/*[clinic input] +@critical_section +mmap.mmap.read + + n as num_bytes: object(converter='_Py_convert_optional_to_ssize_t', type='Py_ssize_t', c_default='PY_SSIZE_T_MAX') = None + / + +[clinic start generated code]*/ + static PyObject * -mmap_read_method(PyObject *op, PyObject *args) +mmap_mmap_read_impl(mmap_object *self, Py_ssize_t num_bytes) +/*[clinic end generated code: output=3b4d4f3704ed0969 input=8f97f361d435e357]*/ { - Py_ssize_t num_bytes = PY_SSIZE_T_MAX, remaining; - mmap_object *self = mmap_object_CAST(op); + Py_ssize_t remaining; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "|n?:read", &num_bytes)) - return NULL; - CHECK_VALID(NULL); /* silently 'adjust' out-of-range requests */ remaining = (self->pos < self->size) ? self->size - self->pos : 0; @@ -533,81 +586,105 @@ mmap_read_method(PyObject *op, PyObject *args) } static PyObject * -mmap_gfind(mmap_object *self, - PyObject *args, - int reverse) +mmap_gfind_lock_held(mmap_object *self, Py_buffer *view, PyObject *start_obj, + PyObject *end_obj, int reverse) { Py_ssize_t start = self->pos; Py_ssize_t end = self->size; - Py_buffer view; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find", - &view, &start, &end)) { - return NULL; - } - else { - if (start < 0) - start += self->size; - if (start < 0) - start = 0; - else if (start > self->size) - start = self->size; - - if (end < 0) - end += self->size; - if (end < 0) - end = 0; - else if (end > self->size) - end = self->size; - - Py_ssize_t index; - PyObject *result; - CHECK_VALID_OR_RELEASE(NULL, view); - if (end < start) { - result = PyLong_FromSsize_t(-1); + if (start_obj != Py_None) { + start = _As_Py_ssize_t(start_obj); + if (start == -1 && PyErr_Occurred()) { + return NULL; } - else if (reverse) { - assert(0 <= start && start <= end && end <= self->size); - if (_safe_PyBytes_ReverseFind(&index, self, - self->data + start, end - start, - view.buf, view.len, start) < 0) - { - result = NULL; - } - else { - result = PyLong_FromSsize_t(index); + + if (end_obj != Py_None) { + end = _As_Py_ssize_t(end_obj); + if (end == -1 && PyErr_Occurred()) { + return NULL; } } + } + + if (start < 0) + start += self->size; + if (start < 0) + start = 0; + else if (start > self->size) + start = self->size; + + if (end < 0) + end += self->size; + if (end < 0) + end = 0; + else if (end > self->size) + end = self->size; + + Py_ssize_t index; + PyObject *result; + CHECK_VALID(NULL); + if (end < start) { + result = PyLong_FromSsize_t(-1); + } + else if (reverse) { + assert(0 <= start && start <= end && end <= self->size); + if (_safe_PyBytes_ReverseFind(&index, self, + self->data + start, end - start, + view->buf, view->len, start) < 0) + { + result = NULL; + } else { - assert(0 <= start && start <= end && end <= self->size); - if (_safe_PyBytes_Find(&index, self, - self->data + start, end - start, - view.buf, view.len, start) < 0) - { - result = NULL; - } - else { - result = PyLong_FromSsize_t(index); - } + result = PyLong_FromSsize_t(index); } - PyBuffer_Release(&view); - return result; } + else { + assert(0 <= start && start <= end && end <= self->size); + if (_safe_PyBytes_Find(&index, self, + self->data + start, end - start, + view->buf, view->len, start) < 0) + { + result = NULL; + } + else { + result = PyLong_FromSsize_t(index); + } + } + return result; } +/*[clinic input] +@critical_section +mmap.mmap.find + + view: Py_buffer + start: object = None + end: object = None + / + +[clinic start generated code]*/ + static PyObject * -mmap_find_method(PyObject *op, PyObject *args) +mmap_mmap_find_impl(mmap_object *self, Py_buffer *view, PyObject *start, + PyObject *end) +/*[clinic end generated code: output=ef8878a322f00192 input=0135504494b52c2b]*/ { - mmap_object *self = mmap_object_CAST(op); - return mmap_gfind(self, args, 0); + return mmap_gfind_lock_held(self, view, start, end, 0); } +/*[clinic input] +@critical_section +mmap.mmap.rfind = mmap.mmap.find + +[clinic start generated code]*/ + static PyObject * -mmap_rfind_method(PyObject *op, PyObject *args) +mmap_mmap_rfind_impl(mmap_object *self, Py_buffer *view, PyObject *start, + PyObject *end) +/*[clinic end generated code: output=73b918940d67c2b8 input=8aecdd1f70c06c62]*/ { - mmap_object *self = mmap_object_CAST(op); - return mmap_gfind(self, args, 1); + return mmap_gfind_lock_held(self, view, start, end, 1); } static int @@ -643,50 +720,55 @@ is_resizeable(mmap_object *self) } +/*[clinic input] +@critical_section +mmap.mmap.write + + bytes as data: Py_buffer + / + +[clinic start generated code]*/ + static PyObject * -mmap_write_method(PyObject *op, PyObject *args) +mmap_mmap_write_impl(mmap_object *self, Py_buffer *data) +/*[clinic end generated code: output=9e97063efb6fb27b input=3f16fa79aa89d6f7]*/ { - Py_buffer data; - mmap_object *self = mmap_object_CAST(op); - CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "y*:write", &data)) - return NULL; - if (!is_writable(self)) { - PyBuffer_Release(&data); return NULL; } - if (self->pos > self->size || self->size - self->pos < data.len) { - PyBuffer_Release(&data); + if (self->pos > self->size || self->size - self->pos < data->len) { PyErr_SetString(PyExc_ValueError, "data out of range"); return NULL; } - CHECK_VALID_OR_RELEASE(NULL, data); + CHECK_VALID(NULL); PyObject *result; - if (safe_memcpy(self->data + self->pos, data.buf, data.len) < 0) { + if (safe_memcpy(self->data + self->pos, data->buf, data->len) < 0) { result = NULL; } else { - self->pos += data.len; - result = PyLong_FromSsize_t(data.len); + self->pos += data->len; + result = PyLong_FromSsize_t(data->len); } - PyBuffer_Release(&data); return result; } +/*[clinic input] +@critical_section +mmap.mmap.write_byte + + byte as value: unsigned_char + / + +[clinic start generated code]*/ + static PyObject * -mmap_write_byte_method(PyObject *op, PyObject *args) +mmap_mmap_write_byte_impl(mmap_object *self, unsigned char value) +/*[clinic end generated code: output=aa11adada9b17510 input=32740bfa174f0991]*/ { - char value; - mmap_object *self = mmap_object_CAST(op); - CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "b:write_byte", &value)) - return(NULL); - if (!is_writable(self)) return NULL; @@ -696,17 +778,23 @@ mmap_write_byte_method(PyObject *op, PyObject *args) return NULL; } - if (safe_byte_copy(self->data + self->pos, &value) < 0) { + if (safe_byte_copy(self->data + self->pos, (const char*)&value) < 0) { return NULL; } self->pos++; Py_RETURN_NONE; } +/*[clinic input] +@critical_section +mmap.mmap.size + +[clinic start generated code]*/ + static PyObject * -mmap_size_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_size_impl(mmap_object *self) +/*[clinic end generated code: output=c177e65e83a648ff input=f69c072efd2e1595]*/ { - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); #ifdef MS_WINDOWS @@ -753,14 +841,21 @@ mmap_size_method(PyObject *op, PyObject *Py_UNUSED(ignored)) / new size? */ +/*[clinic input] +@critical_section +mmap.mmap.resize + + newsize as new_size: Py_ssize_t + / + +[clinic start generated code]*/ + static PyObject * -mmap_resize_method(PyObject *op, PyObject *args) +mmap_mmap_resize_impl(mmap_object *self, Py_ssize_t new_size) +/*[clinic end generated code: output=6f262537ce9c2dcc input=b6b5dee52a41b79f]*/ { - Py_ssize_t new_size; - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "n:resize", &new_size) || - !is_resizeable(self)) { + if (!is_resizeable(self)) { return NULL; } if (new_size < 0 || PY_SSIZE_T_MAX - new_size < self->offset) { @@ -874,6 +969,13 @@ mmap_resize_method(PyObject *op, PyObject *args) #else void *newmap; +#ifdef __linux__ + if (self->fd == -1 && !(self->flags & MAP_PRIVATE) && new_size > self->size) { + PyErr_Format(PyExc_ValueError, + "mmap: can't expand a shared anonymous mapping on Linux"); + return NULL; + } +#endif if (self->fd != -1 && ftruncate(self->fd, self->offset + new_size) == -1) { PyErr_SetFromErrno(PyExc_OSError); return NULL; @@ -901,23 +1003,45 @@ mmap_resize_method(PyObject *op, PyObject *args) } } +/*[clinic input] +@critical_section +mmap.mmap.tell + +[clinic start generated code]*/ + static PyObject * -mmap_tell_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_tell_impl(mmap_object *self) +/*[clinic end generated code: output=6034958630e1b1d1 input=fd163acacf45c3a5]*/ { - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); return PyLong_FromSize_t(self->pos); } +/*[clinic input] +@critical_section +mmap.mmap.flush + + offset: Py_ssize_t = 0 + size as size_obj: object = None + / + +[clinic start generated code]*/ + static PyObject * -mmap_flush_method(PyObject *op, PyObject *args) +mmap_mmap_flush_impl(mmap_object *self, Py_ssize_t offset, + PyObject *size_obj) +/*[clinic end generated code: output=41a10c349ed1608a input=7e6bb8f9462f53e6]*/ { - Py_ssize_t offset = 0; - mmap_object *self = mmap_object_CAST(op); Py_ssize_t size = self->size; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "|nn:flush", &offset, &size)) - return NULL; + + if (size_obj != Py_None) { + size = _As_Py_ssize_t(size_obj); + if (size == -1 && PyErr_Occurred()) { + return NULL; + } + } + if (size < 0 || offset < 0 || self->size - offset < size) { PyErr_SetString(PyExc_ValueError, "flush values out of range"); return NULL; @@ -945,60 +1069,80 @@ mmap_flush_method(PyObject *op, PyObject *args) #endif } +/*[clinic input] +@critical_section +mmap.mmap.seek + + pos as dist: Py_ssize_t + whence as how: int = 0 + / + +[clinic start generated code]*/ + static PyObject * -mmap_seek_method(PyObject *op, PyObject *args) +mmap_mmap_seek_impl(mmap_object *self, Py_ssize_t dist, int how) +/*[clinic end generated code: output=00310494e8b8c592 input=e2fda5d081c3db22]*/ { - Py_ssize_t dist; - mmap_object *self = mmap_object_CAST(op); - int how=0; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "n|i:seek", &dist, &how)) - return NULL; - else { - Py_ssize_t where; - switch (how) { - case 0: /* relative to start */ - where = dist; - break; - case 1: /* relative to current position */ - if (PY_SSIZE_T_MAX - self->pos < dist) - goto onoutofrange; - where = self->pos + dist; - break; - case 2: /* relative to end */ - if (PY_SSIZE_T_MAX - self->size < dist) - goto onoutofrange; - where = self->size + dist; - break; - default: - PyErr_SetString(PyExc_ValueError, "unknown seek type"); - return NULL; - } - if (where > self->size || where < 0) + Py_ssize_t where; + switch (how) { + case 0: /* relative to start */ + where = dist; + break; + case 1: /* relative to current position */ + if (PY_SSIZE_T_MAX - self->pos < dist) + goto onoutofrange; + where = self->pos + dist; + break; + case 2: /* relative to end */ + if (PY_SSIZE_T_MAX - self->size < dist) goto onoutofrange; - self->pos = where; - return PyLong_FromSsize_t(self->pos); + where = self->size + dist; + break; + default: + PyErr_SetString(PyExc_ValueError, "unknown seek type"); + return NULL; } + if (where > self->size || where < 0) + goto onoutofrange; + self->pos = where; + return PyLong_FromSsize_t(self->pos); onoutofrange: PyErr_SetString(PyExc_ValueError, "seek out of range"); return NULL; } +/*[clinic input] +mmap.mmap.seekable + +[clinic start generated code]*/ + static PyObject * -mmap_seekable_method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap_seekable_impl(mmap_object *self) +/*[clinic end generated code: output=6311dc3ea300fa38 input=5132505f6e259001]*/ { Py_RETURN_TRUE; } +/*[clinic input] +@critical_section +mmap.mmap.move + + dest: Py_ssize_t + src: Py_ssize_t + count as cnt: Py_ssize_t + / + +[clinic start generated code]*/ + static PyObject * -mmap_move_method(PyObject *op, PyObject *args) +mmap_mmap_move_impl(mmap_object *self, Py_ssize_t dest, Py_ssize_t src, + Py_ssize_t cnt) +/*[clinic end generated code: output=391f549a44181793 input=cf8cfe10d9f6b448]*/ { - Py_ssize_t dest, src, cnt; - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "nnn:move", &dest, &src, &cnt) || - !is_writable(self)) { + if (!is_writable(self)) { return NULL; } else { /* bounds check the values */ @@ -1024,30 +1168,53 @@ static PyObject * mmap_closed_get(PyObject *op, void *Py_UNUSED(closure)) { mmap_object *self = mmap_object_CAST(op); + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); #ifdef MS_WINDOWS - return PyBool_FromLong(self->map_handle == NULL ? 1 : 0); + result = PyBool_FromLong(self->map_handle == NULL ? 1 : 0); #elif defined(UNIX) - return PyBool_FromLong(self->data == NULL ? 1 : 0); + result = PyBool_FromLong(self->data == NULL ? 1 : 0); #endif + Py_END_CRITICAL_SECTION(); + return result; } +/*[clinic input] +@critical_section +mmap.mmap.__enter__ + +[clinic start generated code]*/ + static PyObject * -mmap__enter__method(PyObject *op, PyObject *Py_UNUSED(ignored)) +mmap_mmap___enter___impl(mmap_object *self) +/*[clinic end generated code: output=92cfc59f4c4e2d26 input=a446541fbfe0b890]*/ { - mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); return Py_NewRef(self); } +/*[clinic input] +@critical_section +mmap.mmap.__exit__ + + exc_type: object + exc_value: object + traceback: object + / + +[clinic start generated code]*/ + static PyObject * -mmap__exit__method(PyObject *op, PyObject *Py_UNUSED(args)) +mmap_mmap___exit___impl(mmap_object *self, PyObject *exc_type, + PyObject *exc_value, PyObject *traceback) +/*[clinic end generated code: output=bec7e3e319c1f07e input=5f28e91cf752bc64]*/ { - return mmap_close_method(op, NULL); + return mmap_mmap_close_impl(self); } static PyObject * -mmap__repr__method(PyObject *op) +mmap__repr__method_lock_held(PyObject *op) { mmap_object *mobj = mmap_object_CAST(op); @@ -1091,11 +1258,27 @@ mmap__repr__method(PyObject *op) } } +static PyObject * +mmap__repr__method(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap__repr__method_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + #ifdef MS_WINDOWS +/*[clinic input] +@critical_section +mmap.mmap.__sizeof__ + +[clinic start generated code]*/ + static PyObject * -mmap__sizeof__method(PyObject *op, PyObject *Py_UNUSED(dummy)) +mmap_mmap___sizeof___impl(mmap_object *self) +/*[clinic end generated code: output=1aed30daff807d09 input=8a648868a089553c]*/ { - mmap_object *self = mmap_object_CAST(op); size_t res = _PyObject_SIZE(Py_TYPE(self)); if (self->tagname) { res += (wcslen(self->tagname) + 1) * sizeof(self->tagname[0]); @@ -1105,18 +1288,26 @@ mmap__sizeof__method(PyObject *op, PyObject *Py_UNUSED(dummy)) #endif #if defined(MS_WINDOWS) && defined(Py_DEBUG) +/*[clinic input] +@critical_section +mmap.mmap._protect + + flNewProtect: unsigned_int(bitwise=True) + start: Py_ssize_t + length: Py_ssize_t + / + +[clinic start generated code]*/ + static PyObject * -mmap_protect_method(PyObject *op, PyObject *args) { - DWORD flNewProtect, flOldProtect; - Py_ssize_t start, length; - mmap_object *self = mmap_object_CAST(op); +mmap_mmap__protect_impl(mmap_object *self, unsigned int flNewProtect, + Py_ssize_t start, Py_ssize_t length) +/*[clinic end generated code: output=a87271a34d1ad6cf input=9170498c5e1482da]*/ +{ + DWORD flOldProtect; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "Inn:protect", &flNewProtect, &start, &length)) { - return NULL; - } - if (!VirtualProtect((void *) (self->data + start), length, flNewProtect, &flOldProtect)) { @@ -1129,18 +1320,32 @@ mmap_protect_method(PyObject *op, PyObject *args) { #endif #ifdef HAVE_MADVISE +/*[clinic input] +@critical_section +mmap.mmap.madvise + + option: int + start: Py_ssize_t = 0 + length as length_obj: object = None + / + +[clinic start generated code]*/ + static PyObject * -mmap_madvise_method(PyObject *op, PyObject *args) +mmap_mmap_madvise_impl(mmap_object *self, int option, Py_ssize_t start, + PyObject *length_obj) +/*[clinic end generated code: output=816be656f08c0e3c input=2d37f7a4c87f1053]*/ { - int option; - Py_ssize_t start = 0, length; - mmap_object *self = mmap_object_CAST(op); + Py_ssize_t length; CHECK_VALID(NULL); - length = self->size; - - if (!PyArg_ParseTuple(args, "i|nn:madvise", &option, &start, &length)) { - return NULL; + if (length_obj == Py_None) { + length = self->size; + } else { + length = _As_Py_ssize_t(length_obj); + if (length == -1 && PyErr_Occurred()) { + return NULL; + } } if (start < 0 || start >= self->size) { @@ -1176,32 +1381,26 @@ static struct PyMemberDef mmap_object_members[] = { }; static struct PyMethodDef mmap_object_methods[] = { - {"close", mmap_close_method, METH_NOARGS}, - {"find", mmap_find_method, METH_VARARGS}, - {"rfind", mmap_rfind_method, METH_VARARGS}, - {"flush", mmap_flush_method, METH_VARARGS}, -#ifdef HAVE_MADVISE - {"madvise", mmap_madvise_method, METH_VARARGS}, -#endif - {"move", mmap_move_method, METH_VARARGS}, - {"read", mmap_read_method, METH_VARARGS}, - {"read_byte", mmap_read_byte_method, METH_NOARGS}, - {"readline", mmap_read_line_method, METH_NOARGS}, - {"resize", mmap_resize_method, METH_VARARGS}, - {"seek", mmap_seek_method, METH_VARARGS}, - {"seekable", mmap_seekable_method, METH_NOARGS}, - {"size", mmap_size_method, METH_NOARGS}, - {"tell", mmap_tell_method, METH_NOARGS}, - {"write", mmap_write_method, METH_VARARGS}, - {"write_byte", mmap_write_byte_method, METH_VARARGS}, - {"__enter__", mmap__enter__method, METH_NOARGS}, - {"__exit__", mmap__exit__method, METH_VARARGS}, -#ifdef MS_WINDOWS - {"__sizeof__", mmap__sizeof__method, METH_NOARGS}, -#ifdef Py_DEBUG - {"_protect", mmap_protect_method, METH_VARARGS}, -#endif // Py_DEBUG -#endif // MS_WINDOWS + MMAP_MMAP_CLOSE_METHODDEF + MMAP_MMAP_FIND_METHODDEF + MMAP_MMAP_RFIND_METHODDEF + MMAP_MMAP_FLUSH_METHODDEF + MMAP_MMAP_MADVISE_METHODDEF + MMAP_MMAP_MOVE_METHODDEF + MMAP_MMAP_READ_METHODDEF + MMAP_MMAP_READ_BYTE_METHODDEF + MMAP_MMAP_READLINE_METHODDEF + MMAP_MMAP_RESIZE_METHODDEF + MMAP_MMAP_SEEK_METHODDEF + MMAP_MMAP_SEEKABLE_METHODDEF + MMAP_MMAP_SIZE_METHODDEF + MMAP_MMAP_TELL_METHODDEF + MMAP_MMAP_WRITE_METHODDEF + MMAP_MMAP_WRITE_BYTE_METHODDEF + MMAP_MMAP___ENTER___METHODDEF + MMAP_MMAP___EXIT___METHODDEF + MMAP_MMAP___SIZEOF___METHODDEF + MMAP_MMAP__PROTECT_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -1214,7 +1413,7 @@ static PyGetSetDef mmap_object_getset[] = { /* Functions for treating an mmap'ed file as a buffer */ static int -mmap_buffer_getbuf(PyObject *op, Py_buffer *view, int flags) +mmap_buffer_getbuf_lock_held(PyObject *op, Py_buffer *view, int flags) { mmap_object *self = mmap_object_CAST(op); CHECK_VALID(-1); @@ -1225,23 +1424,45 @@ mmap_buffer_getbuf(PyObject *op, Py_buffer *view, int flags) return 0; } +static int +mmap_buffer_getbuf(PyObject *op, Py_buffer *view, int flags) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_buffer_getbuf_lock_held(op, view, flags); + Py_END_CRITICAL_SECTION(); + return result; +} + static void mmap_buffer_releasebuf(PyObject *op, Py_buffer *Py_UNUSED(view)) { mmap_object *self = mmap_object_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); self->exports--; + Py_END_CRITICAL_SECTION(); } static Py_ssize_t -mmap_length(PyObject *op) +mmap_length_lock_held(PyObject *op) { mmap_object *self = mmap_object_CAST(op); CHECK_VALID(-1); return self->size; } +static Py_ssize_t +mmap_length(PyObject *op) +{ + Py_ssize_t result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_length_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + static PyObject * -mmap_item(PyObject *op, Py_ssize_t i) +mmap_item_lock_held(PyObject *op, Py_ssize_t i) { mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); @@ -1258,7 +1479,16 @@ mmap_item(PyObject *op, Py_ssize_t i) } static PyObject * -mmap_subscript(PyObject *op, PyObject *item) +mmap_item(PyObject *op, Py_ssize_t i) { + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_item_lock_held(op, i); + Py_END_CRITICAL_SECTION(); + return result; +} + +static PyObject * +mmap_subscript_lock_held(PyObject *op, PyObject *item) { mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); @@ -1320,8 +1550,18 @@ mmap_subscript(PyObject *op, PyObject *item) } } +static PyObject * +mmap_subscript(PyObject *op, PyObject *item) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_subscript_lock_held(op, item); + Py_END_CRITICAL_SECTION(); + return result; +} + static int -mmap_ass_item(PyObject *op, Py_ssize_t i, PyObject *v) +mmap_ass_item_lock_held(PyObject *op, Py_ssize_t i, PyObject *v) { const char *buf; mmap_object *self = mmap_object_CAST(op); @@ -1352,7 +1592,17 @@ mmap_ass_item(PyObject *op, Py_ssize_t i, PyObject *v) } static int -mmap_ass_subscript(PyObject *op, PyObject *item, PyObject *value) +mmap_ass_item(PyObject *op, Py_ssize_t i, PyObject *v) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_ass_item_lock_held(op, i, v); + Py_END_CRITICAL_SECTION(); + return result; +} + +static int +mmap_ass_subscript_lock_held(PyObject *op, PyObject *item, PyObject *value) { mmap_object *self = mmap_object_CAST(op); CHECK_VALID(-1); @@ -1448,6 +1698,16 @@ mmap_ass_subscript(PyObject *op, PyObject *item, PyObject *value) } } +static int +mmap_ass_subscript(PyObject *op, PyObject *item, PyObject *value) +{ + int result; + Py_BEGIN_CRITICAL_SECTION(op); + result = mmap_ass_subscript_lock_held(op, item, value); + Py_END_CRITICAL_SECTION(); + return result; +} + static PyObject * new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict); @@ -1670,6 +1930,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) else { m_obj->fd = -1; } + m_obj->flags = flags; Py_BEGIN_ALLOW_THREADS m_obj->data = mmap(NULL, map_size, prot, flags, fd, offset); @@ -1709,7 +1970,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) DWORD off_lo; /* lower 32 bits of offset */ DWORD size_hi; /* upper 32 bits of size */ DWORD size_lo; /* lower 32 bits of size */ - PyObject *tagname = NULL; + PyObject *tagname = Py_None; DWORD dwErr = 0; int fileno; HANDLE fh = 0; @@ -1719,7 +1980,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|U?iL", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|OiL", keywords, &fileno, &map_size, &tagname, &access, &offset)) { return NULL; @@ -1852,7 +2113,13 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj->weakreflist = NULL; m_obj->exports = 0; /* set the tag name */ - if (tagname != NULL) { + if (!Py_IsNone(tagname)) { + if (!PyUnicode_Check(tagname)) { + Py_DECREF(m_obj); + return PyErr_Format(PyExc_TypeError, "expected str or None for " + "'tagname', not %.200s", + Py_TYPE(tagname)->tp_name); + } m_obj->tagname = PyUnicode_AsWideCharString(tagname, NULL); if (m_obj->tagname == NULL) { Py_DECREF(m_obj); diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 922694fa367ac3..e11f16d0940f5e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -685,7 +685,16 @@ static void reset_remotedebug_data(PyThreadState *tstate) { tstate->remote_debugger_support.debugger_pending_call = 0; - memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE); + memset(tstate->remote_debugger_support.debugger_script_path, 0, + _Py_MAX_SCRIPT_PATH_SIZE); +} + +static void +reset_asyncio_state(_PyThreadStateImpl *tstate) +{ + llist_init(&tstate->asyncio_tasks_head); + tstate->asyncio_running_loop = NULL; + tstate->asyncio_running_task = NULL; } @@ -724,6 +733,8 @@ PyOS_AfterFork_Child(void) reset_remotedebug_data(tstate); + reset_asyncio_state((_PyThreadStateImpl *)tstate); + // Remove the dead thread states. We "start the world" once we are the only // thread state left to undo the stop the world call in `PyOS_BeforeFork`. // That needs to happen before `_PyThreadState_DeleteList`, because that @@ -2743,7 +2754,7 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) SET_ITEM(ST_BLOCKS_IDX, PyLong_FromLong((long)st->st_blocks)); #endif #ifdef HAVE_STRUCT_STAT_ST_RDEV - SET_ITEM(ST_RDEV_IDX, PyLong_FromLong((long)st->st_rdev)); + SET_ITEM(ST_RDEV_IDX, _PyLong_FromDev(st->st_rdev)); #endif #ifdef HAVE_STRUCT_STAT_ST_GEN SET_ITEM(ST_GEN_IDX, PyLong_FromLong((long)st->st_gen)); @@ -3121,17 +3132,6 @@ class dev_t_return_converter(unsigned_long_return_converter): conversion_fn = '_PyLong_FromDev' unsigned_cast = '(dev_t)' -class FSConverter_converter(CConverter): - type = 'PyObject *' - converter = 'PyUnicode_FSConverter' - def converter_init(self): - if self.default is not unspecified: - fail("FSConverter_converter does not support default values") - self.c_default = 'NULL' - - def cleanup(self): - return "Py_XDECREF(" + self.name + ");\n" - class pid_t_converter(CConverter): type = 'pid_t' format_unit = '" _Py_PARSE_PID "' @@ -3195,7 +3195,7 @@ class confname_converter(CConverter): """, argname=argname, converter=self.converter, table=self.table) [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=8189d5ae78244626]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=d2759f2332cd39b3]*/ /*[clinic input] @@ -3476,12 +3476,12 @@ Change the current working directory to the specified path. path may always be specified as a string. On some platforms, path may also be specified as an open file descriptor. - If this functionality is unavailable, using it raises an exception. +If this functionality is unavailable, using it raises an exception. [clinic start generated code]*/ static PyObject * os_chdir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=3be6400eee26eaae input=1a4a15b4d12cb15d]*/ +/*[clinic end generated code: output=3be6400eee26eaae input=a74ceab5d72adf74]*/ { int result; @@ -6097,14 +6097,14 @@ os_system_impl(PyObject *module, const wchar_t *command) /*[clinic input] os.system -> long - command: FSConverter + command: unicode_fs_encoded Execute the command in a subshell. [clinic start generated code]*/ static long os_system_impl(PyObject *module, PyObject *command) -/*[clinic end generated code: output=290fc437dd4f33a0 input=86a58554ba6094af]*/ +/*[clinic end generated code: output=290fc437dd4f33a0 input=47c6f24b6dc92881]*/ { long result; const char *bytes = PyBytes_AsString(command); @@ -7447,7 +7447,7 @@ py_posix_spawn(int use_posix_spawnp, PyObject *module, path_t *path, PyObject *a if (argc < 1) { PyErr_Format(PyExc_ValueError, "%s: argv must not be empty", func_name); - return NULL; + goto exit; } if (!PyMapping_Check(env) && env != Py_None) { @@ -7975,53 +7975,19 @@ os_register_at_fork_impl(PyObject *module, PyObject *before, // running in the process. Best effort, silent if unable to count threads. // Constraint: Quick. Never overcounts. Never leaves an error set. // -// This should only be called from the parent process after +// This MUST only be called from the parent process after // PyOS_AfterFork_Parent(). static void -warn_about_fork_with_threads(const char* name) +warn_about_fork_with_threads( + const char* name, // Name of the API to use in the warning message. + const Py_ssize_t num_os_threads // Only trusted when >= 1. +) { // It's not safe to issue the warning while the world is stopped, because // other threads might be holding locks that we need, which would deadlock. assert(!_PyRuntime.stoptheworld.world_stopped); - // TODO: Consider making an `os` module API to return the current number - // of threads in the process. That'd presumably use this platform code but - // raise an error rather than using the inaccurate fallback. - Py_ssize_t num_python_threads = 0; -#if defined(__APPLE__) && defined(HAVE_GETPID) - mach_port_t macos_self = mach_task_self(); - mach_port_t macos_task; - if (task_for_pid(macos_self, getpid(), &macos_task) == KERN_SUCCESS) { - thread_array_t macos_threads; - mach_msg_type_number_t macos_n_threads; - if (task_threads(macos_task, &macos_threads, - &macos_n_threads) == KERN_SUCCESS) { - num_python_threads = macos_n_threads; - } - } -#elif defined(__linux__) - // Linux /proc/self/stat 20th field is the number of threads. - FILE* proc_stat = fopen("/proc/self/stat", "r"); - if (proc_stat) { - size_t n; - // Size chosen arbitrarily. ~60% more bytes than a 20th column index - // observed on the author's workstation. - char stat_line[160]; - n = fread(&stat_line, 1, 159, proc_stat); - stat_line[n] = '\0'; - fclose(proc_stat); - - char *saveptr = NULL; - char *field = strtok_r(stat_line, " ", &saveptr); - unsigned int idx; - for (idx = 19; idx && field; --idx) { - field = strtok_r(NULL, " ", &saveptr); - } - if (idx == 0 && field) { // found the 20th field - num_python_threads = atoi(field); // 0 on error - } - } -#endif + Py_ssize_t num_python_threads = num_os_threads; if (num_python_threads <= 0) { // Fall back to just the number our threading module knows about. // An incomplete view of the world, but better than nothing. @@ -8074,6 +8040,51 @@ warn_about_fork_with_threads(const char* name) PyErr_Clear(); } } + +// If this returns <= 0, we were unable to successfully use any OS APIs. +// Returns a positive number of threads otherwise. +static Py_ssize_t get_number_of_os_threads(void) +{ + // TODO: Consider making an `os` module API to return the current number + // of threads in the process. That'd presumably use this platform code but + // raise an error rather than using the inaccurate fallback. + Py_ssize_t num_python_threads = 0; +#if defined(__APPLE__) && defined(HAVE_GETPID) + mach_port_t macos_self = mach_task_self(); + mach_port_t macos_task; + if (task_for_pid(macos_self, getpid(), &macos_task) == KERN_SUCCESS) { + thread_array_t macos_threads; + mach_msg_type_number_t macos_n_threads; + if (task_threads(macos_task, &macos_threads, + &macos_n_threads) == KERN_SUCCESS) { + num_python_threads = macos_n_threads; + } + } +#elif defined(__linux__) + // Linux /proc/self/stat 20th field is the number of threads. + FILE* proc_stat = fopen("/proc/self/stat", "r"); + if (proc_stat) { + size_t n; + // Size chosen arbitrarily. ~60% more bytes than a 20th column index + // observed on the author's workstation. + char stat_line[160]; + n = fread(&stat_line, 1, 159, proc_stat); + stat_line[n] = '\0'; + fclose(proc_stat); + + char *saveptr = NULL; + char *field = strtok_r(stat_line, " ", &saveptr); + unsigned int idx; + for (idx = 19; idx && field; --idx) { + field = strtok_r(NULL, " ", &saveptr); + } + if (idx == 0 && field) { // found the 20th field + num_python_threads = atoi(field); // 0 on error + } + } +#endif + return num_python_threads; +} #endif // HAVE_FORK1 || HAVE_FORKPTY || HAVE_FORK #ifdef HAVE_FORK1 @@ -8108,10 +8119,12 @@ os_fork1_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { + // Called before AfterFork_Parent in case those hooks start threads. + Py_ssize_t num_os_threads = get_number_of_os_threads(); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. - warn_about_fork_with_threads("fork1"); + warn_about_fork_with_threads("fork1", num_os_threads); } if (pid == -1) { errno = saved_errno; @@ -8157,10 +8170,12 @@ os_fork_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { + // Called before AfterFork_Parent in case those hooks start threads. + Py_ssize_t num_os_threads = get_number_of_os_threads(); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. - warn_about_fork_with_threads("fork"); + warn_about_fork_with_threads("fork", num_os_threads); } if (pid == -1) { errno = saved_errno; @@ -8185,10 +8200,10 @@ static PyObject * os_sched_get_priority_max_impl(PyObject *module, int policy) /*[clinic end generated code: output=9e465c6e43130521 input=2097b7998eca6874]*/ { - int max; - - max = sched_get_priority_max(policy); - if (max < 0) + /* make sure that errno is cleared before the call */ + errno = 0; + int max = sched_get_priority_max(policy); + if (max == -1 && errno) return posix_error(); return PyLong_FromLong(max); } @@ -8206,8 +8221,10 @@ static PyObject * os_sched_get_priority_min_impl(PyObject *module, int policy) /*[clinic end generated code: output=7595c1138cc47a6d input=21bc8fa0d70983bf]*/ { + /* make sure that errno is cleared before the call */ + errno = 0; int min = sched_get_priority_min(policy); - if (min < 0) + if (min == -1 && errno) return posix_error(); return PyLong_FromLong(min); } @@ -8268,7 +8285,7 @@ os_sched_param_impl(PyTypeObject *type, PyObject *sched_priority) static PyObject * os_sched_param_reduce(PyObject *self, PyObject *Py_UNUSED(dummy)) { - return Py_BuildValue("(O(N))", Py_TYPE(self), PyStructSequence_GetItem(self, 0)); + return Py_BuildValue("(O(O))", Py_TYPE(self), PyStructSequence_GetItem(self, 0)); } static PyMethodDef os_sched_param_reduce_method = { @@ -8806,14 +8823,14 @@ os_ptsname_impl(PyObject *module, int fd) #if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) || defined(HAVE_LOGIN_TTY) || defined(HAVE_DEV_PTMX) #ifdef HAVE_PTY_H #include <pty.h> -#ifdef HAVE_UTMP_H -#include <utmp.h> -#endif /* HAVE_UTMP_H */ #elif defined(HAVE_LIBUTIL_H) #include <libutil.h> #elif defined(HAVE_UTIL_H) #include <util.h> #endif /* HAVE_PTY_H */ +#ifdef HAVE_UTMP_H +#include <utmp.h> +#endif /* HAVE_UTMP_H */ #ifdef HAVE_STROPTS_H #include <stropts.h> #endif @@ -9012,10 +9029,12 @@ os_forkpty_impl(PyObject *module) /* child: this clobbers and resets the import lock. */ PyOS_AfterFork_Child(); } else { + // Called before AfterFork_Parent in case those hooks start threads. + Py_ssize_t num_os_threads = get_number_of_os_threads(); /* parent: release the import lock. */ PyOS_AfterFork_Parent(); // After PyOS_AfterFork_Parent() starts the world to avoid deadlock. - warn_about_fork_with_threads("forkpty"); + warn_about_fork_with_threads("forkpty", num_os_threads); } if (pid == -1) { return posix_error(); @@ -9282,7 +9301,7 @@ os_getgroups_impl(PyObject *module) /*[clinic input] os.initgroups - username as oname: FSConverter + username as oname: unicode_fs_encoded gid: int / @@ -9295,12 +9314,12 @@ group id. static PyObject * os_initgroups_impl(PyObject *module, PyObject *oname, int gid) -/*[clinic end generated code: output=7f074d30a425fd3a input=df3d54331b0af204]*/ +/*[clinic end generated code: output=7f074d30a425fd3a input=984e60c7fed88cb4]*/ #else /*[clinic input] os.initgroups - username as oname: FSConverter + username as oname: unicode_fs_encoded gid: gid_t / @@ -9313,7 +9332,7 @@ group id. static PyObject * os_initgroups_impl(PyObject *module, PyObject *oname, gid_t gid) -/*[clinic end generated code: output=59341244521a9e3f input=0cb91bdc59a4c564]*/ +/*[clinic end generated code: output=59341244521a9e3f input=17d8fbe2dea42ca4]*/ #endif { const char *username = PyBytes_AS_STRING(oname); @@ -9548,6 +9567,24 @@ os_getlogin_impl(PyObject *module) } else result = PyErr_SetFromWindowsErr(GetLastError()); +#elif defined (HAVE_GETLOGIN_R) +# if defined (HAVE_MAXLOGNAME) + char name[MAXLOGNAME + 1]; +# elif defined (HAVE_UT_NAMESIZE) + char name[UT_NAMESIZE + 1]; +# else + char name[256]; +# endif + int err = getlogin_r(name, sizeof(name)); + if (err) { + int old_errno = errno; + errno = -err; + posix_error(); + errno = old_errno; + } + else { + result = PyUnicode_DecodeFSDefault(name); + } #else char *name; int old_errno = errno; @@ -13046,8 +13083,8 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) /*[clinic input] os.putenv - name: FSConverter - value: FSConverter + name: unicode_fs_encoded + value: unicode_fs_encoded / Change or add an environment variable. @@ -13055,7 +13092,7 @@ Change or add an environment variable. static PyObject * os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) -/*[clinic end generated code: output=d29a567d6b2327d2 input=a97bc6152f688d31]*/ +/*[clinic end generated code: output=d29a567d6b2327d2 input=84fcd30f873c8c45]*/ { const char *name_string = PyBytes_AS_STRING(name); const char *value_string = PyBytes_AS_STRING(value); @@ -13098,7 +13135,7 @@ os_unsetenv_impl(PyObject *module, PyObject *name) #else /*[clinic input] os.unsetenv - name: FSConverter + name: unicode_fs_encoded / Delete an environment variable. @@ -13106,7 +13143,7 @@ Delete an environment variable. static PyObject * os_unsetenv_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=54c4137ab1834f02 input=2bb5288a599c7107]*/ +/*[clinic end generated code: output=54c4137ab1834f02 input=78ff12e505ade80a]*/ { if (PySys_Audit("os.unsetenv", "(O)", name) < 0) { return NULL; @@ -15164,14 +15201,14 @@ os_urandom_impl(PyObject *module, Py_ssize_t size) /*[clinic input] os.memfd_create - name: FSConverter + name: unicode_fs_encoded flags: unsigned_int(bitwise=True, c_default="MFD_CLOEXEC") = MFD_CLOEXEC [clinic start generated code]*/ static PyObject * os_memfd_create_impl(PyObject *module, PyObject *name, unsigned int flags) -/*[clinic end generated code: output=6681ede983bdb9a6 input=a42cfc199bcd56e9]*/ +/*[clinic end generated code: output=6681ede983bdb9a6 input=cd0eb092cfac474b]*/ { int fd; const char *bytes = PyBytes_AS_STRING(name); @@ -16934,6 +16971,25 @@ os__emscripten_debugger_impl(PyObject *module) emscripten_debugger(); Py_RETURN_NONE; } + +EM_JS(void, emscripten_log_impl_js, (const char* arg), { + console.warn(UTF8ToString(arg)); +}); + +/*[clinic input] +os._emscripten_log + arg: str + +Log something to the JS console. Emscripten only. +[clinic start generated code]*/ + +static PyObject * +os__emscripten_log_impl(PyObject *module, const char *arg) +/*[clinic end generated code: output=9749e5e293c42784 input=350aa1f70bc1e905]*/ +{ + emscripten_log_impl_js(arg); + Py_RETURN_NONE; +} #endif /* __EMSCRIPTEN__ */ @@ -17153,6 +17209,7 @@ static PyMethodDef posix_methods[] = { OS__IS_INPUTHOOK_INSTALLED_METHODDEF OS__CREATE_ENVIRON_METHODDEF OS__EMSCRIPTEN_DEBUGGER_METHODDEF + OS__EMSCRIPTEN_LOG_METHODDEF {NULL, NULL} /* Sentinel */ }; diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index fa153d86543e99..009af9539f2f40 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -9,6 +9,8 @@ #include <stdbool.h> #include <stddef.h> // offsetof() + +#include "expat_config.h" #include "expat.h" #include "pyexpat.h" @@ -74,6 +76,15 @@ typedef struct { PyObject_HEAD XML_Parser itself; + /* + * Strong reference to a parent `xmlparseobject` if this parser + * is a child parser. Set to NULL if this parser is a root parser. + * This is needed to keep the parent parser alive as long as it has + * at least one child parser. + * + * See https://github.com/python/cpython/issues/139400 for details. + */ + PyObject *parent; int ordered_attributes; /* Return attributes as a list. */ int specified_attributes; /* Report only specified attributes. */ int in_callback; /* Is a callback active? */ @@ -120,50 +131,89 @@ CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, setter(xml_parser, xml_handler); } +static int +set_xml_error_attr_code(PyObject *err, enum XML_Error code) +{ + PyObject *v = PyLong_FromLong((long)code); + int ok = v != NULL && PyObject_SetAttr(err, &_Py_ID(code), v) != -1; + Py_XDECREF(v); + return ok; +} + /* Set an integer attribute on the error object; return true on success, * false on an exception. */ static int -set_error_attr(PyObject *err, const char *name, int value) +set_xml_error_attr_location(PyObject *err, const char *name, XML_Size value) { - PyObject *v = PyLong_FromLong(value); + PyObject *v = PyLong_FromSize_t((size_t)value); + int ok = v != NULL && PyObject_SetAttrString(err, name, v) != -1; + Py_XDECREF(v); + return ok; +} - if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) { - Py_XDECREF(v); - return 0; + +static PyObject * +set_xml_error(pyexpat_state *state, + enum XML_Error code, XML_Size lineno, XML_Size column, + const char *errmsg) +{ + PyObject *arg; + if (errmsg == NULL) { + arg = PyUnicode_FromFormat( + "%s: line %zu, column %zu", + XML_ErrorString(code), + (size_t)lineno, (size_t)column + ); + } + else { + arg = PyUnicode_FromStringAndSize(errmsg, strlen(errmsg)); + } + if (arg == NULL) { + return NULL; } - Py_DECREF(v); - return 1; + PyObject *res = PyObject_CallOneArg(state->error, arg); + Py_DECREF(arg); + if ( + res != NULL + && set_xml_error_attr_code(res, code) + && set_xml_error_attr_location(res, "lineno", lineno) + && set_xml_error_attr_location(res, "offset", column) + ) { + PyErr_SetObject(state->error, res); + } + Py_XDECREF(res); + return NULL; } +#define SET_XML_ERROR(STATE, SELF, CODE, ERRMSG) \ + do { \ + XML_Parser parser = SELF->itself; \ + assert(parser != NULL); \ + XML_Size lineno = XML_GetCurrentLineNumber(parser); \ + XML_Size column = XML_GetCurrentColumnNumber(parser); \ + (void)set_xml_error(state, CODE, lineno, column, ERRMSG); \ + } while (0) + /* Build and set an Expat exception, including positioning * information. Always returns NULL. */ static PyObject * set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code) { - PyObject *err; - PyObject *buffer; - XML_Parser parser = self->itself; - int lineno = XML_GetErrorLineNumber(parser); - int column = XML_GetErrorColumnNumber(parser); + SET_XML_ERROR(state, self, code, NULL); + return NULL; +} - buffer = PyUnicode_FromFormat("%s: line %i, column %i", - XML_ErrorString(code), lineno, column); - if (buffer == NULL) - return NULL; - err = PyObject_CallOneArg(state->error, buffer); - Py_DECREF(buffer); - if ( err != NULL - && set_error_attr(err, "code", code) - && set_error_attr(err, "offset", column) - && set_error_attr(err, "lineno", lineno)) { - PyErr_SetObject(state->error, err); - } - Py_XDECREF(err); +static PyObject * +set_invalid_arg(pyexpat_state *state, xmlparseobject *self, const char *errmsg) +{ + SET_XML_ERROR(state, self, XML_ERROR_INVALID_ARGUMENT, errmsg); return NULL; } +#undef SET_XML_ERROR + static int have_handler(xmlparseobject *self, int type) { @@ -588,7 +638,7 @@ my_ElementDeclHandler(void *userData, PyObject *modelobj, *nameobj; if (PyErr_Occurred()) - return; + goto finally; if (flush_character_buffer(self) < 0) goto finally; @@ -1019,6 +1069,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, return NULL; } + // The new subparser will make use of the parent XML_Parser inside of Expat. + // So we need to take subparsers into account with the reference counting + // of their parent parser. + Py_INCREF(self); + new_parser->buffer_size = self->buffer_size; new_parser->buffer_used = 0; new_parser->buffer = NULL; @@ -1028,6 +1083,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->ns_prefixes = self->ns_prefixes; new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, encoding); + new_parser->parent = (PyObject *)self; new_parser->handlers = 0; new_parser->intern = Py_XNewRef(self->intern); @@ -1035,11 +1091,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->buffer = PyMem_Malloc(new_parser->buffer_size); if (new_parser->buffer == NULL) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } } if (!new_parser->itself) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } @@ -1053,6 +1111,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self, new_parser->handlers = PyMem_New(PyObject *, i); if (!new_parser->handlers) { Py_DECREF(new_parser); + Py_DECREF(self); return PyErr_NoMemory(); } clear_handlers(new_parser, 1); @@ -1125,6 +1184,112 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls, } #endif +#if XML_COMBINED_VERSION >= 20702 +static PyObject * +set_activation_threshold(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold, + XML_Bool (*setter)(XML_Parser, unsigned long long)) +{ + assert(self->itself != NULL); + if (setter(self->itself, threshold) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here) + // or is a non-root parser, which currently only happens for parsers + // created by ExternalEntityParserCreate(). + pyexpat_state *state = PyType_GetModuleState(cls); + return set_invalid_arg(state, self, "parser must be a root parser"); +} + +static PyObject * +set_maximum_amplification(xmlparseobject *self, + PyTypeObject *cls, + float max_factor, + XML_Bool (*setter)(XML_Parser, float)) +{ + assert(self->itself != NULL); + if (setter(self->itself, max_factor) == XML_TRUE) { + Py_RETURN_NONE; + } + // The setter fails if self->itself is NULL (which is not possible here), + // is a non-root parser, which currently only happens for parsers created + // by ExternalEntityParserCreate(), or if 'max_factor' is NaN or < 1.0. + pyexpat_state *state = PyType_GetModuleState(cls); + // Note: Expat has no API to determine whether a parser is a root parser, + // and since the Expat functions for defining the various maximum allowed + // amplifcation factors fail when a bad parser or an out-of-range factor + // is given without specifying which check failed, we check whether the + // factor is out-of-range to improve the error message. See also gh-90949. + const char *message = (isnan(max_factor) || max_factor < 1.0f) + ? "'max_factor' must be at least 1.0" + : "parser must be a root parser"; + return set_invalid_arg(state, self, message); +} +#endif + +#if XML_COMBINED_VERSION >= 20702 +/*[clinic input] +pyexpat.xmlparser.SetAllocTrackerActivationThreshold + + cls: defining_class + threshold: unsigned_long_long + / + +Sets the number of allocated bytes of dynamic memory needed to activate protection against disproportionate use of RAM. + +By default, parser objects have an allocation activation threshold of 64 MiB. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self, + PyTypeObject *cls, + unsigned long long threshold) +/*[clinic end generated code: output=bed7e93207ba08c5 input=9c706b75c18e4ea1]*/ +{ + return set_activation_threshold( + self, cls, threshold, + XML_SetAllocTrackerActivationThreshold + ); +} +#endif + +#if XML_COMBINED_VERSION >= 20702 +/*[clinic input] +pyexpat.xmlparser.SetAllocTrackerMaximumAmplification + + cls: defining_class + max_factor: float + / + +Sets the maximum amplification factor between direct input and bytes of dynamic memory allocated. + +The amplification factor is calculated as "allocated / direct" while parsing, +where "direct" is the number of bytes read from the primary document in parsing +and "allocated" is the number of bytes of dynamic memory allocated in the parser +hierarchy. + +The 'max_factor' value must be a non-NaN floating point value greater than +or equal to 1.0. Amplification factors greater than 100.0 can be observed +near the start of parsing even with benign files in practice. In particular, +the activation threshold should be carefully chosen to avoid false positives. + +By default, parser objects have a maximum amplification factor of 100.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self, + PyTypeObject *cls, + float max_factor) +/*[clinic end generated code: output=6e44bd48c9b112a0 input=918b9266b490a722]*/ +{ + return set_maximum_amplification( + self, cls, max_factor, + XML_SetAllocTrackerMaximumAmplification + ); +} +#endif + static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_PARSE_METHODDEF PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF @@ -1133,9 +1298,9 @@ static struct PyMethodDef xmlparse_methods[] = { PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF -#if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF -#endif + PYEXPAT_XMLPARSER_SETALLOCTRACKERACTIVATIONTHRESHOLD_METHODDEF + PYEXPAT_XMLPARSER_SETALLOCTRACKERMAXIMUMAMPLIFICATION_METHODDEF PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ @@ -1242,6 +1407,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, namespace_separator); + self->parent = NULL; if (self->itself == NULL) { PyErr_SetString(PyExc_RuntimeError, "XML_ParserCreate failed"); @@ -1278,6 +1444,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg) for (size_t i = 0; handler_info[i].name != NULL; i++) { Py_VISIT(self->handlers[i]); } + Py_VISIT(self->parent); Py_VISIT(Py_TYPE(op)); return 0; } @@ -1288,6 +1455,10 @@ xmlparse_clear(PyObject *op) xmlparseobject *self = xmlparseobject_CAST(op); clear_handlers(self, 0); Py_CLEAR(self->intern); + // NOTE: We cannot call Py_CLEAR(self->parent) prior to calling + // XML_ParserFree(self->itself), or a subparser could lose its parent + // XML_Parser while still making use of it internally. + // https://github.com/python/cpython/issues/139400 return 0; } @@ -1301,6 +1472,7 @@ xmlparse_dealloc(PyObject *op) XML_ParserFree(self->itself); } self->itself = NULL; + Py_CLEAR(self->parent); if (self->handlers != NULL) { PyMem_Free(self->handlers); @@ -2141,6 +2313,13 @@ pyexpat_exec(PyObject *mod) #else capi->SetReparseDeferralEnabled = NULL; #endif +#if XML_COMBINED_VERSION >= 20702 + capi->SetAllocTrackerActivationThreshold = XML_SetAllocTrackerActivationThreshold; + capi->SetAllocTrackerMaximumAmplification = XML_SetAllocTrackerMaximumAmplification; +#else + capi->SetAllocTrackerActivationThreshold = NULL; + capi->SetAllocTrackerMaximumAmplification = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME, diff --git a/Modules/readline.c b/Modules/readline.c index 0dd99dc66c08e9..4da66ca6e7b94c 100644 --- a/Modules/readline.c +++ b/Modules/readline.c @@ -255,6 +255,7 @@ readline_read_init_file_impl(PyObject *module, PyObject *filename_obj) if (!PyUnicode_FSConverter(filename_obj, &filename_bytes)) return NULL; if (PySys_Audit("open", "OCi", filename_obj, 'r', 0) < 0) { + Py_DECREF(filename_bytes); return NULL; } errno = rl_read_init_file(PyBytes_AS_STRING(filename_bytes)); @@ -298,6 +299,7 @@ readline_read_history_file_impl(PyObject *module, PyObject *filename_obj) if (!PyUnicode_FSConverter(filename_obj, &filename_bytes)) return NULL; if (PySys_Audit("open", "OCi", filename_obj, 'r', 0) < 0) { + Py_DECREF(filename_bytes); return NULL; } errno = read_history(PyBytes_AS_STRING(filename_bytes)); @@ -343,6 +345,7 @@ readline_write_history_file_impl(PyObject *module, PyObject *filename_obj) return NULL; filename = PyBytes_AS_STRING(filename_bytes); if (PySys_Audit("open", "OCi", filename_obj, 'w', 0) < 0) { + Py_DECREF(filename_bytes); return NULL; } } else { @@ -400,6 +403,7 @@ readline_append_history_file_impl(PyObject *module, int nelements, return NULL; filename = PyBytes_AS_STRING(filename_bytes); if (PySys_Audit("open", "OCi", filename_obj, 'a', 0) < 0) { + Py_DECREF(filename_bytes); return NULL; } } else { diff --git a/Modules/resource.c b/Modules/resource.c index 3fe18e7c98e3d8..2353bc6653abd8 100644 --- a/Modules/resource.c +++ b/Modules/resource.c @@ -1,7 +1,5 @@ -// Need limited C API version 3.13 for PySys_Audit() -#include "pyconfig.h" // Py_GIL_DISABLED -#ifndef Py_GIL_DISABLED -# define Py_LIMITED_API 0x030d0000 +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 #endif #include "Python.h" @@ -150,6 +148,35 @@ resource_getrusage_impl(PyObject *module, int who) } #endif +static int +py2rlim(PyObject *obj, rlim_t *out) +{ + obj = PyNumber_Index(obj); + if (obj == NULL) { + return -1; + } + int neg = PyLong_IsNegative(obj); + assert(neg >= 0); + Py_ssize_t bytes = PyLong_AsNativeBytes(obj, out, sizeof(*out), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + Py_DECREF(obj); + if (bytes < 0) { + return -1; + } + else if (neg && (*out != RLIM_INFINITY || bytes > (Py_ssize_t)sizeof(*out))) { + PyErr_SetString(PyExc_ValueError, + "Cannot convert negative int"); + return -1; + } + else if (bytes > (Py_ssize_t)sizeof(*out)) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C rlim_t"); + return -1; + } + return 0; +} + static int py2rlimit(PyObject *limits, struct rlimit *rl_out) { @@ -166,26 +193,13 @@ py2rlimit(PyObject *limits, struct rlimit *rl_out) } curobj = PyTuple_GetItem(limits, 0); // borrowed maxobj = PyTuple_GetItem(limits, 1); // borrowed -#if !defined(HAVE_LARGEFILE_SUPPORT) - rl_out->rlim_cur = PyLong_AsLong(curobj); - if (rl_out->rlim_cur == (rlim_t)-1 && PyErr_Occurred()) - goto error; - rl_out->rlim_max = PyLong_AsLong(maxobj); - if (rl_out->rlim_max == (rlim_t)-1 && PyErr_Occurred()) - goto error; -#else - /* The limits are probably bigger than a long */ - rl_out->rlim_cur = PyLong_AsLongLong(curobj); - if (rl_out->rlim_cur == (rlim_t)-1 && PyErr_Occurred()) - goto error; - rl_out->rlim_max = PyLong_AsLongLong(maxobj); - if (rl_out->rlim_max == (rlim_t)-1 && PyErr_Occurred()) + if (py2rlim(curobj, &rl_out->rlim_cur) < 0 || + py2rlim(maxobj, &rl_out->rlim_max) < 0) + { goto error; -#endif + } Py_DECREF(limits); - rl_out->rlim_cur = rl_out->rlim_cur & RLIM_INFINITY; - rl_out->rlim_max = rl_out->rlim_max & RLIM_INFINITY; return 0; error: @@ -193,15 +207,24 @@ py2rlimit(PyObject *limits, struct rlimit *rl_out) return -1; } +static PyObject* +rlim2py(rlim_t value) +{ + if (value == RLIM_INFINITY) { + return PyLong_FromNativeBytes(&value, sizeof(value), -1); + } + return PyLong_FromUnsignedNativeBytes(&value, sizeof(value), -1); +} + static PyObject* rlimit2py(struct rlimit rl) { - if (sizeof(rl.rlim_cur) > sizeof(long)) { - return Py_BuildValue("LL", - (long long) rl.rlim_cur, - (long long) rl.rlim_max); + PyObject *cur = rlim2py(rl.rlim_cur); + if (cur == NULL) { + return NULL; } - return Py_BuildValue("ll", (long) rl.rlim_cur, (long) rl.rlim_max); + PyObject *max = rlim2py(rl.rlim_max); + return Py_BuildValue("NN", cur, max); } /*[clinic input] @@ -495,14 +518,7 @@ resource_exec(PyObject *module) ADD_INT(module, RLIMIT_KQUEUES); #endif - PyObject *v; - if (sizeof(RLIM_INFINITY) > sizeof(long)) { - v = PyLong_FromLongLong((long long) RLIM_INFINITY); - } else - { - v = PyLong_FromLong((long) RLIM_INFINITY); - } - if (PyModule_Add(module, "RLIM_INFINITY", v) < 0) { + if (PyModule_Add(module, "RLIM_INFINITY", rlim2py(RLIM_INFINITY)) < 0) { return -1; } return 0; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index f4a00cdb422156..a746bf74f8d4c1 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -272,19 +272,25 @@ static PyType_Spec sha1_type_spec = { /*[clinic input] _sha1.sha1 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA1 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=6f8b3af05126e18e input=bd54b68e2bf36a8a]*/ +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=0d453775924f88a7 input=807f25264e0ac656]*/ { SHA1object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); diff --git a/Modules/sha2module.c b/Modules/sha2module.c index e88d7cb2d456bf..72931910c5d720 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -594,18 +594,24 @@ static PyType_Spec sha512_type_spec = { /*[clinic input] _sha2.sha256 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-256 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=243c9dd289931f87 input=6249da1de607280a]*/ +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=49828a7bcd418f45 input=9ce1d70e669abc14]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); @@ -651,18 +657,25 @@ _sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha224 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-224 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=68191f232e4a3843 input=c42bcba47fd7d2b7]*/ +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=2163cb03b6cf6157 input=612f7682a889bc2a]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } @@ -706,19 +719,25 @@ _sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha512 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-512 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=d55c8996eca214d7 input=0576ae2a6ebfad25]*/ +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=cc3fcfce001a4538 input=19c9f2c06d59563a]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); @@ -763,19 +782,25 @@ _sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha384 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-384 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=b29a0d81d51d1368 input=4e9199d8de0d2f9b]*/ +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=b6e3db593b5a0330 input=9fd50c942ad9e0bf]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); diff --git a/Modules/sha3module.c b/Modules/sha3module.c index a7edf5c66a1e76..cfbf0cbcc042c5 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -105,18 +105,25 @@ sha3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *buf, Py_ssize_t len) /*[clinic input] @classmethod _sha3.sha3_224.__new__ as py_sha3_new - data: object(c_default="NULL") = b'' - / + + data as data_obj: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new SHA3 hash object. [clinic start generated code]*/ static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) -/*[clinic end generated code: output=90409addc5d5e8b0 input=637e5f8f6a93982a]*/ +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=dcec1eca20395f2a input=c106e0b4e2d67d58]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } + Py_buffer buf = {NULL, NULL}; SHA3State *state = _PyType_GetModuleState(type); SHA3object *self = newSHA3object(type); @@ -503,14 +510,13 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) _sha3.shake_128.digest length: unsigned_long - / Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=2313605e2f87bb8f input=418ef6a36d2e6082]*/ +/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/ { return _SHAKE_digest((PyObject *)self, length, 0); } @@ -520,14 +526,13 @@ _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) _sha3.shake_128.hexdigest length: unsigned_long - / Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=bf8e2f1e490944a8 input=69fb29b0926ae321]*/ +/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/ { return _SHAKE_digest((PyObject *)self, length, 1); } diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index 54bcd3270ef31a..d7edbc09242fad 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -1180,7 +1180,13 @@ signal_sigwaitinfo_impl(PyObject *module, sigset_t sigset) err = sigwaitinfo(&sigset, &si); Py_END_ALLOW_THREADS } while (err == -1 - && errno == EINTR && !(async_err = PyErr_CheckSignals())); + && (errno == EINTR +#if defined(__NetBSD__) + /* NetBSD's implementation violates POSIX by setting + * errno to ECANCELED instead of EINTR. */ + || errno == ECANCELED +#endif + ) && !(async_err = PyErr_CheckSignals())); if (err == -1) return (!async_err) ? PyErr_SetFromErrno(PyExc_OSError) : NULL; @@ -1623,7 +1629,7 @@ signal_module_exec(PyObject *m) modstate->ignore_handler = state->ignore_handler; // borrowed ref #ifdef PYHAVE_ITIMER_ERROR - modstate->itimer_error = PyErr_NewException("signal.itimer_error", + modstate->itimer_error = PyErr_NewException("signal.ItimerError", PyExc_OSError, NULL); if (modstate->itimer_error == NULL) { return -1; @@ -1940,7 +1946,7 @@ signal_install_handlers(void) /* Restore signals that the interpreter has called SIG_IGN on to SIG_DFL. * * All of the code in this function must only use async-signal-safe functions, - * listed at `man 7 signal` or + * listed at `man 7 signal-safety` or * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. * * If this function is updated, update also _posix_spawn() of subprocess.py. diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 47958379263793..1fbd6eb9294cfa 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -111,7 +111,6 @@ Local naming conventions: #include "pycore_moduleobject.h" // _PyModule_GetState #include "pycore_time.h" // _PyTime_AsMilliseconds() #include "pycore_pystate.h" // _Py_AssertHoldsTstate() -#include "pycore_pyatomic_ft_wrappers.h" #ifdef _Py_MEMORY_SANITIZER # include <sanitizer/msan_interface.h> @@ -565,7 +564,6 @@ static int sock_cloexec_works = -1; static inline void set_sock_fd(PySocketSockObject *s, SOCKET_T fd) { -#ifdef Py_GIL_DISABLED #if SIZEOF_SOCKET_T == SIZEOF_INT _Py_atomic_store_int_relaxed((int *)&s->sock_fd, (int)fd); #elif SIZEOF_SOCKET_T == SIZEOF_LONG @@ -575,15 +573,11 @@ set_sock_fd(PySocketSockObject *s, SOCKET_T fd) #else #error "Unsupported SIZEOF_SOCKET_T" #endif -#else - s->sock_fd = fd; -#endif } static inline SOCKET_T get_sock_fd(PySocketSockObject *s) { -#ifdef Py_GIL_DISABLED #if SIZEOF_SOCKET_T == SIZEOF_INT return (SOCKET_T)_Py_atomic_load_int_relaxed((int *)&s->sock_fd); #elif SIZEOF_SOCKET_T == SIZEOF_LONG @@ -593,9 +587,6 @@ get_sock_fd(PySocketSockObject *s) #else #error "Unsupported SIZEOF_SOCKET_T" #endif -#else - return s->sock_fd; -#endif } #define _PySocketSockObject_CAST(op) ((PySocketSockObject *)(op)) @@ -638,33 +629,22 @@ _PyLong_##NAME##_Converter(PyObject *obj, void *ptr) \ return 1; \ } -UNSIGNED_INT_CONVERTER(UInt16, uint16_t) -UNSIGNED_INT_CONVERTER(UInt32, uint32_t) - #if defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS) # ifdef MS_WINDOWS UNSIGNED_INT_CONVERTER(NetIfindex, NET_IFINDEX) # else - UNSIGNED_INT_CONVERTER(NetIfindex, unsigned int) +# define _PyLong_NetIfindex_Converter _PyLong_UnsignedInt_Converter # define NET_IFINDEX unsigned int # endif #endif // defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS) /*[python input] -class uint16_converter(CConverter): - type = "uint16_t" - converter = '_PyLong_UInt16_Converter' - -class uint32_converter(CConverter): - type = "uint32_t" - converter = '_PyLong_UInt32_Converter' - class NET_IFINDEX_converter(CConverter): type = "NET_IFINDEX" converter = '_PyLong_NetIfindex_Converter' [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=3de2e4a03fbf83b8]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=1cf809c40a407c34]*/ /*[clinic input] module _socket @@ -7281,7 +7261,7 @@ Returns a list of network interface information (index, name) tuples."); /*[clinic input] _socket.if_nametoindex - oname: object(converter="PyUnicode_FSConverter") + oname: unicode_fs_encoded / Returns the interface index corresponding to the interface name if_name. @@ -7289,7 +7269,7 @@ Returns the interface index corresponding to the interface name if_name. static PyObject * _socket_if_nametoindex_impl(PyObject *module, PyObject *oname) -/*[clinic end generated code: output=289a411614f30244 input=01e0f1205307fb77]*/ +/*[clinic end generated code: output=289a411614f30244 input=6125dc20683560cf]*/ { #ifdef MS_WINDOWS NET_IFINDEX index; @@ -7297,11 +7277,10 @@ _socket_if_nametoindex_impl(PyObject *module, PyObject *oname) unsigned long index; #endif + errno = ENODEV; // in case 'if_nametoindex' does not set errno index = if_nametoindex(PyBytes_AS_STRING(oname)); - Py_DECREF(oname); if (index == 0) { - /* if_nametoindex() doesn't set errno */ - PyErr_SetString(PyExc_OSError, "no interface with this name"); + PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -7321,6 +7300,7 @@ static PyObject * _socket_if_indextoname_impl(PyObject *module, NET_IFINDEX index) /*[clinic end generated code: output=e48bc324993052e0 input=c93f753d0cf6d7d1]*/ { + errno = ENXIO; // in case 'if_indextoname' does not set errno char name[IF_NAMESIZE + 1]; if (if_indextoname(index, name) == NULL) { PyErr_SetFromErrno(PyExc_OSError); diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index d0d5223e5acea8..d353f406831ecd 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -13,7 +13,7 @@ module _symtable _symtable.symtable source: object - filename: object(converter='PyUnicode_FSDecoder') + filename: unicode_fs_decoded startstr: str / @@ -23,7 +23,7 @@ Return symbol and scope dictionaries used internally by compiler. static PyObject * _symtable_symtable_impl(PyObject *module, PyObject *source, PyObject *filename, const char *startstr) -/*[clinic end generated code: output=59eb0d5fc7285ac4 input=9dd8a50c0c36a4d7]*/ +/*[clinic end generated code: output=59eb0d5fc7285ac4 input=436ffff90d02e4f6]*/ { struct symtable *st; PyObject *t; @@ -47,12 +47,10 @@ _symtable_symtable_impl(PyObject *module, PyObject *source, else { PyErr_SetString(PyExc_ValueError, "symtable() arg 3 must be 'exec' or 'eval' or 'single'"); - Py_DECREF(filename); Py_XDECREF(source_copy); return NULL; } st = _Py_SymtableStringObjectFlags(str, filename, start, &cf); - Py_DECREF(filename); Py_XDECREF(source_copy); if (st == NULL) { return NULL; diff --git a/Modules/xxlimited.c b/Modules/xxlimited.c index 26ac35734fb060..09c8d9487f5426 100644 --- a/Modules/xxlimited.c +++ b/Modules/xxlimited.c @@ -14,7 +14,9 @@ This module roughly corresponds to:: class Xxo: - """A class that explicitly stores attributes in an internal dict""" + """A class that explicitly stores attributes in an internal dict + (to simulate custom attribute handling). + """ def __init__(self): # In the C class, "_x_attr" is not accessible from Python code @@ -85,13 +87,16 @@ typedef struct { // Instance state typedef struct { PyObject_HEAD - PyObject *x_attr; /* Attributes dictionary */ + PyObject *x_attr; /* Attributes dictionary. + * May be NULL, which acts as an + * empty dict. + */ char x_buffer[BUFSIZE]; /* buffer for Py_buffer */ Py_ssize_t x_exports; /* how many buffer are exported */ } XxoObject; #define XxoObject_CAST(op) ((XxoObject *)(op)) -// XXX: no good way to do this yet +// TODO: full support for type-checking was added in 3.14 (Py_tp_token) // #define XxoObject_Check(v) Py_IS_TYPE(v, Xxo_Type) static XxoObject * @@ -112,8 +117,13 @@ newXxoObject(PyObject *module) return self; } -/* Xxo finalization */ +/* Xxo finalization. + * + * Types that store references to other PyObjects generally need to implement + * the GC slots: traverse, clear, dealloc, and (optionally) finalize. + */ +// traverse: Visit all references from an object, including its type static int Xxo_traverse(PyObject *op, visitproc visit, void *arg) { @@ -126,6 +136,7 @@ Xxo_traverse(PyObject *op, visitproc visit, void *arg) return 0; } +// clear: drop references in order to break all reference cycles static int Xxo_clear(PyObject *op) { @@ -134,6 +145,8 @@ Xxo_clear(PyObject *op) return 0; } +// finalize: like clear, but should leave the object in a consistent state. +// Equivalent to `__del__` in Python. static void Xxo_finalize(PyObject *op) { @@ -141,6 +154,7 @@ Xxo_finalize(PyObject *op) Py_CLEAR(self->x_attr); } +// dealloc: drop all remaining references and free memory static void Xxo_dealloc(PyObject *self) { @@ -155,6 +169,7 @@ Xxo_dealloc(PyObject *self) /* Xxo attribute handling */ +// Get an attribute. static PyObject * Xxo_getattro(PyObject *op, PyObject *name) { @@ -168,9 +183,12 @@ Xxo_getattro(PyObject *op, PyObject *name) return NULL; } } + // Fall back to generic implementation (this handles special attributes, + // raising AttributeError, etc.) return PyObject_GenericGetAttr(op, name); } +// Set or delete an attribute. static int Xxo_setattro(PyObject *op, PyObject *name, PyObject *v) { @@ -198,7 +216,9 @@ Xxo_setattro(PyObject *op, PyObject *name, PyObject *v) } } -/* Xxo methods */ +/* Xxo methods: C functions plus a PyMethodDef array that lists them and + * specifies metadata. + */ static PyObject * Xxo_demo(PyObject *op, PyTypeObject *defining_class, @@ -234,7 +254,10 @@ static PyMethodDef Xxo_methods[] = { {NULL, NULL} /* sentinel */ }; -/* Xxo buffer interface */ +/* Xxo buffer interface: C functions later referenced from PyType_Slot array. + * Other interfaces (e.g. for sequence-like or number-like types) are defined + * similarly. + */ static int Xxo_getbuffer(PyObject *op, Py_buffer *view, int flags) @@ -300,6 +323,7 @@ static PyType_Spec Xxo_Type_spec = { /* Str type definition*/ static PyType_Slot Str_Type_slots[] = { + // slots array intentionally kept empty {0, 0}, /* sentinel */ }; @@ -400,12 +424,32 @@ xx_modexec(PyObject *m) } static PyModuleDef_Slot xx_slots[] = { + + /* exec function to initialize the module (called as part of import + * after the object was added to sys.modules) + */ {Py_mod_exec, xx_modexec}, + + /* Signal that this module supports being loaded in multiple interpreters + * with separate GILs (global interpreter locks). + * See "Isolating Extension Modules" on how to prepare a module for this: + * https://docs.python.org/3/howto/isolating-extensions.html + */ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + + /* Signal that this module does not rely on the GIL for its own needs. + * Without this slot, free-threaded builds of CPython will enable + * the GIL when this module is loaded. + */ {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL} }; +// Module finalization: modules that hold references in their module state +// need to implement the fullowing GC hooks. They're similar to the ones for +// types (see "Xxo finalization"). + static int xx_traverse(PyObject *module, visitproc visit, void *arg) { @@ -424,6 +468,13 @@ xx_clear(PyObject *module) return 0; } +static void +xx_free(void *module) +{ + // allow xx_modexec to omit calling xx_clear on error + (void)xx_clear((PyObject *)module); +} + static struct PyModuleDef xxmodule = { PyModuleDef_HEAD_INIT, .m_name = "xxlimited", @@ -433,13 +484,13 @@ static struct PyModuleDef xxmodule = { .m_slots = xx_slots, .m_traverse = xx_traverse, .m_clear = xx_clear, - /* m_free is not necessary here: xx_clear clears all references, - * and the module state is deallocated along with the module. - */ + .m_free = xx_free, }; -/* Export function for the module (*must* be called PyInit_xx) */ +/* Export function for the module. *Must* be called PyInit_xx; usually it is + * the only non-`static` object in a module definition. + */ PyMODINIT_FUNC PyInit_xxlimited(void) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index d4b4b91697c08e..cb360f261608bd 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -263,7 +263,9 @@ static compobject * newcompobject(PyTypeObject *type) { compobject *self; - self = PyObject_New(compobject, type); + assert(type != NULL); + assert(type->tp_alloc != NULL); + self = _compobject_CAST(type->tp_alloc(type, 0)); if (self == NULL) return NULL; self->eof = 0; @@ -706,33 +708,41 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict) } static void -Dealloc(compobject *self) +compobject_dealloc_impl(PyObject *op, int (*dealloc)(z_streamp)) { - PyTypeObject *type = Py_TYPE(self); + PyTypeObject *type = Py_TYPE(op); + PyObject_GC_UnTrack(op); + compobject *self = _compobject_CAST(op); + if (self->is_initialised) { + (void)dealloc(&self->zst); + } PyThread_free_lock(self->lock); Py_XDECREF(self->unused_data); Py_XDECREF(self->unconsumed_tail); Py_XDECREF(self->zdict); - PyObject_Free(self); + type->tp_free(self); Py_DECREF(type); } +static int +compobject_traverse(PyObject *op, visitproc visit, void *arg) +{ + compobject *self = _compobject_CAST(op); + Py_VISIT(Py_TYPE(op)); + Py_VISIT(self->zdict); + return 0; +} + static void Comp_dealloc(PyObject *op) { - compobject *self = _compobject_CAST(op); - if (self->is_initialised) - (void)deflateEnd(&self->zst); - Dealloc(self); + compobject_dealloc_impl(op, &deflateEnd); } static void Decomp_dealloc(PyObject *op) { - compobject *self = _compobject_CAST(op); - if (self->is_initialised) - (void)inflateEnd(&self->zst); - Dealloc(self); + compobject_dealloc_impl(op, &inflateEnd); } /*[clinic input] @@ -1357,6 +1367,8 @@ typedef struct { char needs_input; } ZlibDecompressor; +#define ZlibDecompressor_CAST(op) ((ZlibDecompressor *)(op)) + /*[clinic input] class zlib.ZlibDecompressor "ZlibDecompressor *" "&ZlibDecompressorType" [clinic start generated code]*/ @@ -1365,8 +1377,9 @@ class zlib.ZlibDecompressor "ZlibDecompressor *" "&ZlibDecompressorType" static void ZlibDecompressor_dealloc(PyObject *op) { - ZlibDecompressor *self = (ZlibDecompressor*)op; - PyObject *type = (PyObject *)Py_TYPE(self); + PyTypeObject *type = Py_TYPE(op); + PyObject_GC_UnTrack(op); + ZlibDecompressor *self = ZlibDecompressor_CAST(op); PyThread_free_lock(self->lock); if (self->is_initialised) { inflateEnd(&self->zst); @@ -1374,10 +1387,19 @@ ZlibDecompressor_dealloc(PyObject *op) PyMem_Free(self->input_buffer); Py_CLEAR(self->unused_data); Py_CLEAR(self->zdict); - PyObject_Free(self); + type->tp_free(self); Py_DECREF(type); } +static int +ZlibDecompressor_traverse(PyObject *op, visitproc visit, void *arg) +{ + ZlibDecompressor *self = ZlibDecompressor_CAST(op); + Py_VISIT(Py_TYPE(op)); + Py_VISIT(self->zdict); + return 0; +} + static int set_inflate_zdict_ZlibDecompressor(zlibstate *state, ZlibDecompressor *self) { @@ -1729,7 +1751,9 @@ ZlibDecompressor__new__(PyTypeObject *cls, args, kwargs, format, keywords, &wbits, &zdict)) { return NULL; } - ZlibDecompressor *self = PyObject_New(ZlibDecompressor, cls); + + assert(cls != NULL && cls->tp_alloc != NULL); + ZlibDecompressor *self = ZlibDecompressor_CAST(cls->tp_alloc(cls, 0)); if (self == NULL) { return NULL; } @@ -1937,6 +1961,7 @@ static PyMethodDef zlib_methods[] = static PyType_Slot Comptype_slots[] = { {Py_tp_dealloc, Comp_dealloc}, + {Py_tp_traverse, compobject_traverse}, {Py_tp_methods, comp_methods}, {0, 0}, }; @@ -1944,12 +1969,17 @@ static PyType_Slot Comptype_slots[] = { static PyType_Spec Comptype_spec = { .name = "zlib.Compress", .basicsize = sizeof(compobject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_DISALLOW_INSTANTIATION + | Py_TPFLAGS_HAVE_GC + ), .slots= Comptype_slots, }; static PyType_Slot Decomptype_slots[] = { {Py_tp_dealloc, Decomp_dealloc}, + {Py_tp_traverse, compobject_traverse}, {Py_tp_methods, Decomp_methods}, {Py_tp_members, Decomp_members}, {0, 0}, @@ -1958,12 +1988,17 @@ static PyType_Slot Decomptype_slots[] = { static PyType_Spec Decomptype_spec = { .name = "zlib.Decompress", .basicsize = sizeof(compobject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_DISALLOW_INSTANTIATION + | Py_TPFLAGS_HAVE_GC + ), .slots = Decomptype_slots, }; static PyType_Slot ZlibDecompressor_type_slots[] = { {Py_tp_dealloc, ZlibDecompressor_dealloc}, + {Py_tp_traverse, ZlibDecompressor_traverse}, {Py_tp_members, ZlibDecompressor_members}, {Py_tp_new, ZlibDecompressor__new__}, {Py_tp_doc, (char *)ZlibDecompressor__new____doc__}, @@ -1978,7 +2013,11 @@ static PyType_Spec ZlibDecompressor_type_spec = { // ZlibDecompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag // which prevents to create a subclass. // So calling PyType_GetModuleState() in this file is always safe. - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .flags = ( + Py_TPFLAGS_DEFAULT + | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC + ), .slots = ZlibDecompressor_type_slots, }; PyDoc_STRVAR(zlib_module_documentation, diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index b5d5ca9178ebdb..bb4216ecc5aeac 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -709,7 +709,9 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyByteArrayObject *self = _PyByteArray_CAST(op); Py_ssize_t start, stop, step, slicelen; - char *buf = PyByteArray_AS_STRING(self); + // Do not store a reference to the internal buffer since + // index.__index__() or _getbytevalue() may alter 'self'. + // See https://github.com/python/cpython/issues/91153. if (_PyIndex_Check(index)) { Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError); @@ -744,7 +746,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value } else { assert(0 <= ival && ival < 256); - buf[i] = (char)ival; + PyByteArray_AS_STRING(self)[i] = (char)ival; return 0; } } @@ -805,6 +807,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value /* Delete slice */ size_t cur; Py_ssize_t i; + char *buf = PyByteArray_AS_STRING(self); if (!_canresize(self)) return -1; @@ -845,6 +848,7 @@ bytearray_ass_subscript_lock_held(PyObject *op, PyObject *index, PyObject *value /* Assign slice */ Py_ssize_t i; size_t cur; + char *buf = PyByteArray_AS_STRING(self); if (needed != slicelen) { PyErr_Format(PyExc_ValueError, @@ -1533,14 +1537,14 @@ bytearray_removesuffix_impl(PyByteArrayObject *self, Py_buffer *suffix) /*[clinic input] bytearray.resize size: Py_ssize_t - New size to resize to.. + New size to resize to. / Resize the internal buffer of bytearray to len. [clinic start generated code]*/ static PyObject * bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size) -/*[clinic end generated code: output=f73524922990b2d9 input=75fd4d17c4aa47d3]*/ +/*[clinic end generated code: output=f73524922990b2d9 input=6c9a260ca7f72071]*/ { Py_ssize_t start_size = PyByteArray_GET_SIZE(self); int result = PyByteArray_Resize((PyObject *)self, size); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index fc407ec6bf99d6..eb13f16f67295c 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -975,8 +975,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, /* 2: size preallocated for %s */ if (alloc > 2) { res = _PyBytesWriter_Prepare(&writer, res, alloc - 2); - if (res == NULL) + if (res == NULL) { + Py_XDECREF(temp); goto error; + } } #ifndef NDEBUG char *before = res; @@ -1075,10 +1077,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Unescape a backslash-escaped string. */ -PyObject *_PyBytes_DecodeEscape(const char *s, +PyObject *_PyBytes_DecodeEscape2(const char *s, Py_ssize_t len, const char *errors, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { int c; char *p; @@ -1092,7 +1095,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, return NULL; writer.overallocate = 1; - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; end = s + len; while (s < end) { @@ -1130,9 +1134,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, c = (c<<3) + *s++ - '0'; } if (c > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; } } *p++ = c; @@ -1173,9 +1178,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, break; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = (unsigned char)s[-1]; + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; } *p++ = '\\'; s--; @@ -1195,18 +1201,19 @@ PyObject *PyBytes_DecodeEscape(const char *s, Py_ssize_t Py_UNUSED(unicode), const char *Py_UNUSED(recode_encoding)) { - const char* first_invalid_escape; - PyObject *result = _PyBytes_DecodeEscape(s, len, errors, - &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "b\"\\%.3s\" is an invalid octal escape sequence. " + "b\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -1216,7 +1223,7 @@ PyObject *PyBytes_DecodeEscape(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "b\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -3142,7 +3149,7 @@ PyBytes_Concat(PyObject **pv, PyObject *w) return; } - if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) { + if (_PyObject_IsUniquelyReferenced(*pv) && PyBytes_CheckExact(*pv)) { /* Only one reference, so we can resize in place */ Py_ssize_t oldsize; Py_buffer wb; @@ -3227,7 +3234,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) Py_DECREF(v); return 0; } - if (Py_REFCNT(v) != 1) { + if (!_PyObject_IsUniquelyReferenced(v)) { if (oldsize < newsize) { *pv = _PyBytes_FromSize(newsize, 0); if (*pv) { diff --git a/Objects/classobject.c b/Objects/classobject.c index 58e1d17977322e..e71f301f2efd77 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -7,6 +7,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/classobject.c.h" @@ -245,8 +246,7 @@ method_dealloc(PyObject *self) { PyMethodObject *im = _PyMethodObject_CAST(self); _PyObject_GC_UNTRACK(im); - if (im->im_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)im); + FT_CLEAR_WEAKREFS(self, im->im_weakreflist); Py_DECREF(im->im_func); Py_XDECREF(im->im_self); assert(Py_IS_TYPE(self, &PyMethod_Type)); diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index ffb45ade11f6dc..6f13865177dde5 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -599,7 +599,7 @@ PyDoc_STRVAR(bytearray_resize__doc__, "Resize the internal buffer of bytearray to len.\n" "\n" " size\n" -" New size to resize to.."); +" New size to resize to."); #define BYTEARRAY_RESIZE_METHODDEF \ {"resize", (PyCFunction)bytearray_resize, METH_O, bytearray_resize__doc__}, @@ -1796,4 +1796,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl((PyByteArrayObject *)self); } -/*[clinic end generated code: output=be6d28193bc96a2c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fdfe41139c91e409 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/interpolationobject.c.h b/Objects/clinic/interpolationobject.c.h index 7a94dabafc92f2..2030e17e49e47a 100644 --- a/Objects/clinic/interpolationobject.c.h +++ b/Objects/clinic/interpolationobject.c.h @@ -47,26 +47,31 @@ interpolation_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *argsbuf[4]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); - Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 2; + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; PyObject *value; - PyObject *expression; + PyObject *expression = &_Py_STR(empty); PyObject *conversion = Py_None; PyObject *format_spec = &_Py_STR(empty); fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 2, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } value = fastargs[0]; - if (!PyUnicode_Check(fastargs[1])) { - _PyArg_BadArgument("Interpolation", "argument 'expression'", "str", fastargs[1]); - goto exit; - } - expression = fastargs[1]; if (!noptargs) { goto skip_optional_pos; } + if (fastargs[1]) { + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("Interpolation", "argument 'expression'", "str", fastargs[1]); + goto exit; + } + expression = fastargs[1]; + if (!--noptargs) { + goto skip_optional_pos; + } + } if (fastargs[2]) { if (!_conversion_converter(fastargs[2], &conversion)) { goto exit; @@ -86,4 +91,4 @@ interpolation_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=599742a5ccd6f060 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2391391e2d7708c0 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/odictobject.c.h b/Objects/clinic/odictobject.c.h index e71c29a1b268d0..894e9be91bbdce 100644 --- a/Objects/clinic/odictobject.c.h +++ b/Objects/clinic/odictobject.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(OrderedDict_fromkeys__doc__, @@ -73,6 +74,53 @@ OrderedDict_fromkeys(PyObject *type, PyObject *const *args, Py_ssize_t nargs, Py return return_value; } +PyDoc_STRVAR(OrderedDict___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n"); + +#define ORDEREDDICT___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)OrderedDict___sizeof__, METH_NOARGS, OrderedDict___sizeof____doc__}, + +static Py_ssize_t +OrderedDict___sizeof___impl(PyODictObject *self); + +static PyObject * +OrderedDict___sizeof__(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + Py_ssize_t _return_value; + + Py_BEGIN_CRITICAL_SECTION(self); + _return_value = OrderedDict___sizeof___impl((PyODictObject *)self); + Py_END_CRITICAL_SECTION(); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(OrderedDict___reduce____doc__, +"__reduce__($self, /)\n" +"--\n" +"\n" +"Return state information for pickling"); + +#define ORDEREDDICT___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)OrderedDict___reduce__, METH_NOARGS, OrderedDict___reduce____doc__}, + +static PyObject * +OrderedDict___reduce___impl(PyODictObject *od); + +static PyObject * +OrderedDict___reduce__(PyObject *od, PyObject *Py_UNUSED(ignored)) +{ + return OrderedDict___reduce___impl((PyODictObject *)od); +} + PyDoc_STRVAR(OrderedDict_setdefault__doc__, "setdefault($self, /, key, default=None)\n" "--\n" @@ -135,7 +183,9 @@ OrderedDict_setdefault(PyObject *self, PyObject *const *args, Py_ssize_t nargs, } default_value = args[1]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = OrderedDict_setdefault_impl((PyODictObject *)self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -204,7 +254,9 @@ OrderedDict_pop(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObjec } default_value = args[1]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = OrderedDict_pop_impl((PyODictObject *)self, key, default_value); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -272,12 +324,62 @@ OrderedDict_popitem(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyO goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = OrderedDict_popitem_impl((PyODictObject *)self, last); + Py_END_CRITICAL_SECTION(); exit: return return_value; } +PyDoc_STRVAR(OrderedDict_clear__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all items from ordered dict."); + +#define ORDEREDDICT_CLEAR_METHODDEF \ + {"clear", (PyCFunction)OrderedDict_clear, METH_NOARGS, OrderedDict_clear__doc__}, + +static PyObject * +OrderedDict_clear_impl(PyODictObject *self); + +static PyObject * +OrderedDict_clear(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = OrderedDict_clear_impl((PyODictObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(OrderedDict_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"A shallow copy of ordered dict."); + +#define ORDEREDDICT_COPY_METHODDEF \ + {"copy", (PyCFunction)OrderedDict_copy, METH_NOARGS, OrderedDict_copy__doc__}, + +static PyObject * +OrderedDict_copy_impl(PyObject *od); + +static PyObject * +OrderedDict_copy(PyObject *od, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(od); + return_value = OrderedDict_copy_impl(od); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + PyDoc_STRVAR(OrderedDict_move_to_end__doc__, "move_to_end($self, /, key, last=True)\n" "--\n" @@ -342,9 +444,11 @@ OrderedDict_move_to_end(PyObject *self, PyObject *const *args, Py_ssize_t nargs, goto exit; } skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = OrderedDict_move_to_end_impl((PyODictObject *)self, key, last); + Py_END_CRITICAL_SECTION(); exit: return return_value; } -/*[clinic end generated code: output=7d8206823bb1f419 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7bc997ca7900f06f input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 92eaf5e00bc7dd..287558302aa68f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -17,6 +17,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() #include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/codeobject.c.h" #include <stdbool.h> @@ -1012,8 +1013,8 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) * source location tracking (co_lines/co_positions) ******************/ -int -PyCode_Addr2Line(PyCodeObject *co, int addrq) +static int +_PyCode_Addr2Line(PyCodeObject *co, int addrq) { if (addrq < 0) { return co->co_firstlineno; @@ -1027,6 +1028,33 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return _PyCode_CheckLineNumber(addrq, &bounds); } +int +_PyCode_SafeAddr2Line(PyCodeObject *co, int addrq) +{ + if (addrq < 0) { + return co->co_firstlineno; + } + if (co->_co_monitoring && co->_co_monitoring->lines) { + return _Py_Instrumentation_GetLine(co, addrq/sizeof(_Py_CODEUNIT)); + } + if (!(addrq >= 0 && addrq < _PyCode_NBYTES(co))) { + return -1; + } + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(co, &bounds); + return _PyCode_CheckLineNumber(addrq, &bounds); +} + +int +PyCode_Addr2Line(PyCodeObject *co, int addrq) +{ + int lineno; + Py_BEGIN_CRITICAL_SECTION(co); + lineno = _PyCode_Addr2Line(co, addrq); + Py_END_CRITICAL_SECTION(); + return lineno; +} + void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { @@ -1714,7 +1742,7 @@ static int identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, PyObject *globalnames, PyObject *attrnames, PyObject *globalsns, PyObject *builtinsns, - struct co_unbound_counts *counts) + struct co_unbound_counts *counts, int *p_numdupes) { // This function is inspired by inspect.getclosurevars(). // It would be nicer if we had something similar to co_localspluskinds, @@ -1729,6 +1757,7 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, assert(builtinsns == NULL || PyDict_Check(builtinsns)); assert(counts == NULL || counts->total == 0); struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_ssize_t len = Py_SIZE(co); for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); @@ -1747,6 +1776,12 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(attrnames, name) < 0) { return -1; } + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } else if (inst.op.code == LOAD_GLOBAL) { int oparg = GET_OPARG(co, i, inst.op.arg); @@ -1778,11 +1813,20 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(globalnames, name) < 0) { return -1; } + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } } if (counts != NULL) { *counts = unbound; } + if (p_numdupes != NULL) { + *p_numdupes = numdupes; + } return 0; } @@ -1932,20 +1976,24 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, // Fill in unbound.globals and unbound.numattrs. struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_BEGIN_CRITICAL_SECTION(co); res = identify_unbound_names( tstate, co, globalnames, attrnames, globalsns, builtinsns, - &unbound); + &unbound, &numdupes); Py_END_CRITICAL_SECTION(); if (res < 0) { goto finally; } assert(unbound.numunknown == 0); - assert(unbound.total <= counts->unbound.total); + assert(unbound.total - numdupes <= counts->unbound.total); assert(counts->unbound.numunknown == counts->unbound.total); - unbound.numunknown = counts->unbound.total - unbound.total; - unbound.total = counts->unbound.total; + // There may be a name that is both a global and an attr. + int totalunbound = counts->unbound.total + numdupes; + unbound.numunknown = totalunbound - unbound.total; + unbound.total = totalunbound; counts->unbound = unbound; + counts->total += numdupes; res = 0; finally: @@ -1955,12 +2003,129 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, } +int +_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + // We don't worry about co_executors, co_instrumentation, + // or co_monitoring. They are essentially ephemeral. + if (co->co_extra != NULL) { + errmsg = "only basic code objects are supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, + const char **p_errmsg) +{ + const char *errmsg = NULL; + if (counts->numfree > 0) { // It's a closure. + errmsg = "closures not supported"; + } + else if (counts->unbound.globals.numglobal > 0) { + errmsg = "globals not supported"; + } + else if (counts->unbound.globals.numbuiltin > 0 + && counts->unbound.globals.numunknown > 0) + { + errmsg = "globals not supported"; + } + // Otherwise we don't check counts.unbound.globals.numunknown since we can't + // distinguish beween globals and builtins here. + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_VerifyStateless(PyThreadState *tstate, + PyCodeObject *co, PyObject *globalnames, + PyObject *globalsns, PyObject *builtinsns) +{ + const char *errmsg; + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(co, &counts); + if (_PyCode_SetUnboundVarCounts( + tstate, co, &counts, globalnames, NULL, + globalsns, builtinsns) < 0) + { + return -1; + } + // We may consider relaxing the internal state constraints + // if it becomes a problem. + if (!_PyCode_CheckNoInternalState(co, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + if (builtinsns != NULL) { + // Make sure the next check will fail for globals, + // even if there aren't any builtins. + counts.unbound.globals.numbuiltin += 1; + } + if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + // Note that we don't check co->co_flags & CO_NESTED for anything here. + return 0; +} + + +int +_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + if (co->co_flags & CO_GENERATOR) { + errmsg = "generators not supported"; + } + else if (co->co_flags & CO_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ITERABLE_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ASYNC_GENERATOR) { + errmsg = "generators not supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ static int code_returns_only_none(PyCodeObject *co) { + if (!_PyCode_CheckPureFunction(co, NULL)) { + return 0; + } + int len = (int)Py_SIZE(co); + assert(len > 0); + + // The last instruction either returns or raises. We can take advantage + // of that for a quick exit. + _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1); + // Look up None in co_consts. Py_ssize_t nconsts = PyTuple_Size(co->co_consts); int none_index = 0; @@ -1971,26 +2136,42 @@ code_returns_only_none(PyCodeObject *co) } if (none_index == nconsts) { // None wasn't there, which means there was no implicit return, - // "return", or "return None". That means there must be - // an explicit return (non-None). - return 0; - } + // "return", or "return None". - // Walk the bytecode, looking for RETURN_VALUE. - Py_ssize_t len = Py_SIZE(co); - for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { - _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); - if (IS_RETURN_OPCODE(inst.op.code)) { - assert(i != 0); - // Ignore it if it returns None. - _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); - if (prev.op.code == LOAD_CONST) { - // We don't worry about EXTENDED_ARG for now. - if (prev.op.arg == none_index) { - continue; + // That means there must be + // an explicit return (non-None), or it only raises. + if (IS_RETURN_OPCODE(final.op.code)) { + // It was an explicit return (non-None). + return 0; + } + // It must end with a raise then. We still have to walk the + // bytecode to see if there's any explicit return (non-None). + assert(IS_RAISE_OPCODE(final.op.code)); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + // We alraedy know it isn't returning None. + return 0; + } + } + // It must only raise. + } + else { + // Walk the bytecode, looking for RETURN_VALUE. + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + assert(i != 0); + // Ignore it if it returns None. + _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); + if (prev.op.code == LOAD_CONST) { + // We don't worry about EXTENDED_ARG for now. + if (prev.op.arg == none_index) { + continue; + } } + return 0; } - return 0; } } return 1; @@ -2230,6 +2411,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); @@ -2281,9 +2464,7 @@ code_dealloc(PyObject *self) Py_XDECREF(co->_co_cached->_co_varnames); PyMem_Free(co->_co_cached); } - if (co->co_weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, co->co_weakreflist); free_monitoring_data(co->_co_monitoring); #ifdef Py_GIL_DISABLED // The first element always points to the mutable bytecode at the end of @@ -3176,12 +3357,29 @@ _PyCodeArray_New(Py_ssize_t size) return arr; } +// Get the underlying code unit, leaving instrumentation +static _Py_CODEUNIT +deopt_code_unit(PyCodeObject *code, int i) +{ + _Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i; + _Py_CODEUNIT inst = { + .cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)}; + int opcode = inst.op.code; + if (opcode < MIN_INSTRUMENTED_OPCODE) { + inst.op.code = _PyOpcode_Deopt[opcode]; + assert(inst.op.code < MIN_SPECIALIZED_OPCODE); + } + // JIT should not be enabled with free-threading + assert(inst.op.code != ENTER_EXECUTOR); + return inst; +} + static void copy_code(_Py_CODEUNIT *dst, PyCodeObject *co) { int code_len = (int) Py_SIZE(co); for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) { - dst[i] = _Py_GetBaseCodeUnit(co, i); + dst[i] = deopt_code_unit(co, i); } _PyCode_Quicken(dst, code_len, 1); } @@ -3214,7 +3412,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 59b0cf1ce7d422..62be2c53d670ef 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -547,13 +547,13 @@ static inline uint8_t calculate_log2_keysize(Py_ssize_t minsize) { #if SIZEOF_LONG == SIZEOF_SIZE_T - minsize = (minsize | PyDict_MINSIZE) - 1; - return _Py_bit_length(minsize | (PyDict_MINSIZE-1)); + minsize = Py_MAX(minsize, PyDict_MINSIZE); + return _Py_bit_length(minsize - 1); #elif defined(_MSC_VER) - // On 64bit Windows, sizeof(long) == 4. - minsize = (minsize | PyDict_MINSIZE) - 1; + // On 64bit Windows, sizeof(long) == 4. We cannot use _Py_bit_length. + minsize = Py_MAX(minsize, PyDict_MINSIZE); unsigned long msb; - _BitScanReverse64(&msb, (uint64_t)minsize); + _BitScanReverse64(&msb, (uint64_t)minsize - 1); return (uint8_t)(msb + 1); #else uint8_t log2_size; @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif @@ -1082,7 +1082,7 @@ compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; - PyObject *ep_key = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_key); + PyObject *ep_key = FT_ATOMIC_LOAD_PTR_CONSUME(ep->me_key); assert(ep_key != NULL); assert(PyUnicode_CheckExact(ep_key)); if (ep_key == key || @@ -1375,7 +1375,7 @@ compare_unicode_generic_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; - PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); + PyObject *startkey = _Py_atomic_load_ptr_consume(&ep->me_key); assert(startkey == NULL || PyUnicode_CheckExact(ep->me_key)); assert(!PyUnicode_CheckExact(key)); @@ -1418,7 +1418,7 @@ compare_unicode_unicode_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; - PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); + PyObject *startkey = _Py_atomic_load_ptr_consume(&ep->me_key); if (startkey == key) { assert(PyUnicode_CheckExact(startkey)); return 1; @@ -1454,7 +1454,7 @@ compare_generic_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) { PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; - PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); + PyObject *startkey = _Py_atomic_load_ptr_consume(&ep->me_key); if (startkey == key) { return 1; } @@ -1723,6 +1723,14 @@ static inline int insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, Py_hash_t hash, PyObject *key, PyObject *value) { + // gh-140551: If dict was cleared in _Py_dict_lookup, + // we have to resize one more time to force general key kind. + if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { + if (insertion_resize(interp, mp, 0) < 0) + return -1; + assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); + } + if (mp->ma_keys->dk_usable <= 0) { /* Need to resize. */ if (insertion_resize(interp, mp, 1) < 0) { @@ -1819,38 +1827,31 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) { PyObject *old_value; + Py_ssize_t ix; ASSERT_DICT_LOCKED(mp); - if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { - if (insertion_resize(interp, mp, 0) < 0) - goto Fail; - assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); - } - - if (_PyDict_HasSplitTable(mp)) { - Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (_PyDict_HasSplitTable(mp) && PyUnicode_CheckExact(key)) { + ix = insert_split_key(mp->ma_keys, key, hash); if (ix != DKIX_EMPTY) { insert_split_value(interp, mp, key, value, ix); Py_DECREF(key); Py_DECREF(value); return 0; } - - /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { + // No space in shared keys. Go to insert_combined_dict() below. + } + else { + ix = _Py_dict_lookup(mp, key, hash, &old_value); + if (ix == DKIX_ERROR) goto Fail; - } } - Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); - if (ix == DKIX_ERROR) - goto Fail; - if (ix == DKIX_EMPTY) { - assert(!_PyDict_HasSplitTable(mp)); - /* Insert into new slot. */ - assert(old_value == NULL); + // insert_combined_dict() will convert from non DICT_KEYS_GENERAL table + // into DICT_KEYS_GENERAL table if key is not Unicode. + // We don't convert it before _Py_dict_lookup because non-Unicode key + // may change generic table into Unicode table. if (insert_combined_dict(interp, mp, hash, key, value) < 0) { goto Fail; } @@ -1862,10 +1863,14 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, if (old_value != value) { _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); assert(old_value != NULL); - assert(!_PyDict_HasSplitTable(mp)); if (DK_IS_UNICODE(mp->ma_keys)) { - PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; - STORE_VALUE(ep, value); + if (_PyDict_HasSplitTable(mp)) { + STORE_SPLIT_VALUE(mp, ix, value); + } + else { + PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; + STORE_VALUE(ep, value); + } } else { PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; @@ -2770,8 +2775,8 @@ PyDict_DelItem(PyObject *op, PyObject *key) return _PyDict_DelItem_KnownHash(op, key, hash); } -static int -delitem_knownhash_lock_held(PyObject *op, PyObject *key, Py_hash_t hash) +int +_PyDict_DelItem_KnownHash_LockHeld(PyObject *op, PyObject *key, Py_hash_t hash) { Py_ssize_t ix; PyDictObject *mp; @@ -2806,7 +2811,7 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) { int res; Py_BEGIN_CRITICAL_SECTION(op); - res = delitem_knownhash_lock_held(op, key, hash); + res = _PyDict_DelItem_KnownHash_LockHeld(op, key, hash); Py_END_CRITICAL_SECTION(); return res; } @@ -2915,6 +2920,11 @@ clear_lock_held(PyObject *op) ASSERT_CONSISTENT(mp); } +void +_PyDict_Clear_LockHeld(PyObject *op) { + clear_lock_held(op); +} + void PyDict_Clear(PyObject *op) { @@ -3178,9 +3188,10 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, Py_ssize_t pos = 0; PyObject *key; Py_hash_t hash; - - if (dictresize(interp, mp, - estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) { + uint8_t new_size = Py_MAX( + estimate_log2_keysize(PySet_GET_SIZE(iterable)), + DK_LOG_SIZE(mp->ma_keys)); + if (dictresize(interp, mp, new_size, 0)) { Py_DECREF(mp); return NULL; } @@ -3852,7 +3863,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe } } - Py_ssize_t orig_size = other->ma_keys->dk_nentries; + Py_ssize_t orig_size = other->ma_used; Py_ssize_t pos = 0; Py_hash_t hash; PyObject *key, *value; @@ -3886,7 +3897,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe if (err != 0) return -1; - if (orig_size != other->ma_keys->dk_nentries) { + if (orig_size != other->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dict mutated during update"); return -1; @@ -4320,6 +4331,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu PyDictObject *mp = (PyDictObject *)d; PyObject *value; Py_hash_t hash; + Py_ssize_t ix; PyInterpreterState *interp = _PyInterpreterState_GET(); ASSERT_DICT_LOCKED(d); @@ -4355,17 +4367,8 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu return 0; } - if (!PyUnicode_CheckExact(key) && DK_IS_UNICODE(mp->ma_keys)) { - if (insertion_resize(interp, mp, 0) < 0) { - if (result) { - *result = NULL; - } - return -1; - } - } - - if (_PyDict_HasSplitTable(mp)) { - Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); + if (_PyDict_HasSplitTable(mp) && PyUnicode_CheckExact(key)) { + ix = insert_split_key(mp->ma_keys, key, hash); if (ix != DKIX_EMPTY) { PyObject *value = mp->ma_values->values[ix]; int already_present = value != NULL; @@ -4378,33 +4381,29 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } return already_present; } - - /* No space in shared keys. Resize and continue below. */ - if (insertion_resize(interp, mp, 1) < 0) { - goto error; - } + // No space in shared keys. Go to insert_combined_dict() below. } - - assert(!_PyDict_HasSplitTable(mp)); - - Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value); - if (ix == DKIX_ERROR) { - if (result) { - *result = NULL; + else { + ix = _Py_dict_lookup(mp, key, hash, &value); + if (ix == DKIX_ERROR) { + if (result) { + *result = NULL; + } + return -1; } - return -1; } if (ix == DKIX_EMPTY) { - assert(!_PyDict_HasSplitTable(mp)); value = default_value; + // See comment to this function in insertdict. if (insert_combined_dict(interp, mp, hash, Py_NewRef(key), Py_NewRef(value)) < 0) { Py_DECREF(key); Py_DECREF(value); if (result) { *result = NULL; } + return -1; } STORE_USED(mp, mp->ma_used + 1); @@ -4422,12 +4421,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu *result = incref_result ? Py_NewRef(value) : value; } return 1; - -error: - if (result) { - *result = NULL; - } - return -1; } int @@ -4647,9 +4640,11 @@ dict_tp_clear(PyObject *op) static PyObject *dictiter_new(PyDictObject *, PyTypeObject *); -static Py_ssize_t -sizeof_lock_held(PyDictObject *mp) +Py_ssize_t +_PyDict_SizeOf_LockHeld(PyDictObject *mp) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); + size_t res = _PyObject_SIZE(Py_TYPE(mp)); if (_PyDict_HasSplitTable(mp)) { res += shared_keys_usable_size(mp->ma_keys) * sizeof(PyObject*); @@ -4668,7 +4663,7 @@ _PyDict_SizeOf(PyDictObject *mp) { Py_ssize_t res; Py_BEGIN_CRITICAL_SECTION(mp); - res = sizeof_lock_held(mp); + res = _PyDict_SizeOf_LockHeld(mp); Py_END_CRITICAL_SECTION(); return res; @@ -5524,7 +5519,7 @@ dictiter_iternext_threadsafe(PyDictObject *d, PyObject *self, k = _Py_atomic_load_ptr_acquire(&d->ma_keys); assert(i >= 0); if (_PyDict_HasSplitTable(d)) { - PyDictValues *values = _Py_atomic_load_ptr_relaxed(&d->ma_values); + PyDictValues *values = _Py_atomic_load_ptr_consume(&d->ma_values); if (values == NULL) { goto concurrent_modification; } @@ -5609,22 +5604,10 @@ dictiter_iternext_threadsafe(PyDictObject *d, PyObject *self, #endif -static bool -has_unique_reference(PyObject *op) -{ -#ifdef Py_GIL_DISABLED - return (_Py_IsOwnedByCurrentThread(op) && - op->ob_ref_local == 1 && - _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared) == 0); -#else - return Py_REFCNT(op) == 1; -#endif -} - static bool acquire_iter_result(PyObject *result) { - if (has_unique_reference(result)) { + if (_PyObject_IsUniquelyReferenced(result)) { Py_INCREF(result); return true; } @@ -5661,8 +5644,11 @@ dictiter_iternextitem(PyObject *self) } else { result = PyTuple_New(2); - if (result == NULL) + if (result == NULL) { + Py_DECREF(key); + Py_DECREF(value); return NULL; + } PyTuple_SET_ITEM(result, 0, key); PyTuple_SET_ITEM(result, 1, value); } @@ -5770,7 +5756,7 @@ dictreviter_iter_lock_held(PyDictObject *d, PyObject *self) } else if (Py_IS_TYPE(di, &PyDictRevIterItem_Type)) { result = di->di_result; - if (Py_REFCNT(result) == 1) { + if (_PyObject_IsUniquelyReferenced(result)) { PyObject *oldkey = PyTuple_GET_ITEM(result, 0); PyObject *oldvalue = PyTuple_GET_ITEM(result, 1); PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); @@ -6858,7 +6844,7 @@ _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value) dict_unhashable_type(name); return -1; } - return delitem_knownhash_lock_held((PyObject *)dict, name, hash); + return _PyDict_DelItem_KnownHash_LockHeld((PyObject *)dict, name, hash); } else { return setitem_lock_held(dict, name, value); } @@ -7125,7 +7111,7 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr Py_BEGIN_CRITICAL_SECTION(dict); if (dict->ma_values == values && FT_ATOMIC_LOAD_UINT8(values->valid)) { - value = _Py_atomic_load_ptr_relaxed(&values->values[ix]); + value = _Py_atomic_load_ptr_consume(&values->values[ix]); *attr = _Py_XNewRefWithLock(value); success = true; } else { diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 93e1973d6b32fc..700b8d4cbeb9fd 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2028,6 +2028,10 @@ PyFloat_Pack2(double x, char *data, int le) memcpy(&v, &x, sizeof(v)); v &= 0xffc0000000000ULL; bits = (unsigned short)(v >> 42); /* NaN's type & payload */ + /* set qNaN if no payload */ + if (!bits) { + bits |= (1<<9); + } } else { sign = (x < 0.0); @@ -2200,16 +2204,16 @@ PyFloat_Pack4(double x, char *data, int le) if ((v & (1ULL << 51)) == 0) { uint32_t u32; memcpy(&u32, &y, 4); - u32 &= ~(1 << 22); /* make sNaN */ + /* if have payload, make sNaN */ + if (u32 & 0x3fffff) { + u32 &= ~(1 << 22); + } memcpy(&y, &u32, 4); } #else uint32_t u32; memcpy(&u32, &y, 4); - if ((v & (1ULL << 51)) == 0) { - u32 &= ~(1 << 22); - } /* Workaround RISC-V: "If a NaN value is converted to a * different floating-point type, the result is the * canonical NaN of the new type". The canonical NaN here @@ -2220,6 +2224,10 @@ PyFloat_Pack4(double x, char *data, int le) /* add payload */ u32 -= (u32 & 0x3fffff); u32 += (uint32_t)((v & 0x7ffffffffffffULL) >> 29); + /* if have payload, make sNaN */ + if ((v & (1ULL << 51)) == 0 && (u32 & 0x3fffff)) { + u32 &= ~(1 << 22); + } memcpy(&y, &u32, 4); #endif diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 76b52efccf804f..944e98e062d19c 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1671,6 +1671,8 @@ frame_lineno_set_impl(PyFrameObject *self, PyObject *value) case PY_MONITORING_EVENT_PY_RESUME: case PY_MONITORING_EVENT_JUMP: case PY_MONITORING_EVENT_BRANCH: + case PY_MONITORING_EVENT_BRANCH_LEFT: + case PY_MONITORING_EVENT_BRANCH_RIGHT: case PY_MONITORING_EVENT_LINE: case PY_MONITORING_EVENT_PY_YIELD: /* Setting f_lineno is allowed for the above events */ diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 56df5730db0c55..9532c21fc7082e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1,13 +1,16 @@ /* Function object implementation */ #include "Python.h" +#include "pycore_code.h" // _PyCode_VerifyStateless() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_function.h" // _PyFunction_Vectorcall #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_Occurred() +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() static const char * @@ -1146,9 +1149,7 @@ func_dealloc(PyObject *self) return; } _PyObject_GC_UNTRACK(op); - if (op->func_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) op); - } + FT_CLEAR_WEAKREFS(self, op->func_weakreflist); (void)func_clear((PyObject*)op); // These aren't cleared by func_clear(). _Py_DECREF_CODE((PyCodeObject *)op->func_code); @@ -1240,6 +1241,64 @@ PyTypeObject PyFunction_Type = { }; +int +_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) +{ + assert(!PyErr_Occurred()); + assert(PyFunction_Check(func)); + + // Check the globals. + PyObject *globalsns = PyFunction_GET_GLOBALS(func); + if (globalsns != NULL && !PyDict_Check(globalsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported globals %R", globalsns); + return -1; + } + // Check the builtins. + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); + if (builtinsns != NULL && !PyDict_Check(builtinsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported builtins %R", builtinsns); + return -1; + } + // Disallow __defaults__. + PyObject *defaults = PyFunction_GET_DEFAULTS(func); + if (defaults != NULL) { + assert(PyTuple_Check(defaults)); // per PyFunction_New() + if (PyTuple_GET_SIZE(defaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "defaults not supported"); + return -1; + } + } + // Disallow __kwdefaults__. + PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func); + if (kwdefaults != NULL) { + assert(PyDict_Check(kwdefaults)); // per PyFunction_New() + if (PyDict_Size(kwdefaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "keyword defaults not supported"); + return -1; + } + } + // Disallow __closure__. + PyObject *closure = PyFunction_GET_CLOSURE(func); + if (closure != NULL) { + assert(PyTuple_Check(closure)); // per PyFunction_New() + if (PyTuple_GET_SIZE(closure) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); + return -1; + } + } + // Check the code. + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) { + return -1; + } + return 0; +} + + static int functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name) { diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index ec3d01f00a3c3c..8b526f43f1e053 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -7,6 +7,7 @@ #include "pycore_typevarobject.h" // _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() #include "pycore_unionobject.h" // _Py_union_type_or, _PyGenericAlias_Check +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stdbool.h> @@ -33,9 +34,7 @@ ga_dealloc(PyObject *self) gaobject *alias = (gaobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->origin); Py_XDECREF(alias->args); Py_XDECREF(alias->parameters); @@ -65,7 +64,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p) for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { return -1; } } @@ -109,7 +108,7 @@ ga_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -126,7 +125,7 @@ ga_repr(PyObject *self) } if (len == 0) { // for something like tuple[()] we should print a "()" - if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) { goto error; } } @@ -526,12 +525,24 @@ _Py_subs_parameters(PyObject *self, PyObject *args, PyObject *parameters, PyObje return NULL; } if (unpack) { + if (!PyTuple_Check(arg)) { + Py_DECREF(newargs); + Py_DECREF(item); + Py_XDECREF(tuple_args); + PyObject *original = PyTuple_GET_ITEM(args, iarg); + PyErr_Format(PyExc_TypeError, + "expected __typing_subst__ of %T objects to return a tuple, not %T", + original, arg); + Py_DECREF(arg); + return NULL; + } jarg = tuple_extend(&newargs, jarg, &PyTuple_GET_ITEM(arg, 0), PyTuple_GET_SIZE(arg)); Py_DECREF(arg); if (jarg < 0) { Py_DECREF(item); Py_XDECREF(tuple_args); + assert(newargs == NULL); return NULL; } } @@ -637,7 +648,6 @@ ga_vectorcall(PyObject *self, PyObject *const *args, static const char* const attr_exceptions[] = { "__class__", - "__bases__", "__origin__", "__args__", "__unpacked__", @@ -646,6 +656,11 @@ static const char* const attr_exceptions[] = { "__mro_entries__", "__reduce_ex__", // needed so we don't look up object.__reduce_ex__ "__reduce__", + NULL, +}; + +static const char* const attr_blocked[] = { + "__bases__", "__copy__", "__deepcopy__", NULL, @@ -656,15 +671,29 @@ ga_getattro(PyObject *self, PyObject *name) { gaobject *alias = (gaobject *)self; if (PyUnicode_Check(name)) { + // When we check blocked attrs, we don't allow to proxy them to `__origin__`. + // Otherwise, we can break existing code. + for (const char * const *p = attr_blocked; ; p++) { + if (*p == NULL) { + break; + } + if (_PyUnicode_EqualToASCIIString(name, *p)) { + goto generic_getattr; + } + } + + // When we see own attrs, it has a priority over `__origin__`'s attr. for (const char * const *p = attr_exceptions; ; p++) { if (*p == NULL) { return PyObject_GetAttr(alias->origin, name); } if (_PyUnicode_EqualToASCIIString(name, *p)) { - break; + goto generic_getattr; } } } + +generic_getattr: return PyObject_GenericGetAttr(self, name); } diff --git a/Objects/genobject.c b/Objects/genobject.c index 98b2c5004df8ac..575752fc84002a 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -17,6 +17,7 @@ #include "pycore_pyerrors.h" // _PyErr_ClearExcState() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_warnings.h" // _PyErr_WarnUnawaitedCoroutine() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "opcode_ids.h" // RESUME, etc @@ -161,8 +162,7 @@ gen_dealloc(PyObject *self) _PyObject_GC_UNTRACK(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); + FT_CLEAR_WEAKREFS(self, gen->gi_weakreflist); _PyObject_GC_TRACK(self); @@ -315,7 +315,7 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc, int closing) } PyDoc_STRVAR(send_doc, -"send(arg) -> send 'arg' into generator,\n\ +"send(value) -> send 'value' into generator,\n\ return next yielded value or raise StopIteration."); static PyObject * @@ -407,11 +407,12 @@ gen_close(PyObject *self, PyObject *args) } _PyInterpreterFrame *frame = &gen->gi_iframe; if (is_resume(frame->instr_ptr)) { + bool no_unwind_tools = _PyEval_NoToolsForUnwind(_PyThreadState_GET()); /* We can safely ignore the outermost try block * as it is automatically generated to handle * StopIteration. */ int oparg = frame->instr_ptr->op.arg; - if (oparg & RESUME_OPARG_DEPTH1_MASK) { + if (oparg & RESUME_OPARG_DEPTH1_MASK && no_unwind_tools) { // RESUME after YIELD_VALUE and exception depth is 1 assert((oparg & RESUME_OPARG_LOCATION_MASK) != RESUME_AT_FUNC_START); gen->gi_frame_state = FRAME_COMPLETED; @@ -922,6 +923,7 @@ make_gen(PyTypeObject *type, PyFunctionObject *func) gen->gi_weakreflist = NULL; gen->gi_exc_state.exc_value = NULL; gen->gi_exc_state.previous_item = NULL; + gen->gi_iframe.f_executable = PyStackRef_None; assert(func->func_name != NULL); gen->gi_name = Py_NewRef(func->func_name); assert(func->func_qualname != NULL); diff --git a/Objects/interpolationobject.c b/Objects/interpolationobject.c index aaea3b8c0670c9..b58adb693f0cae 100644 --- a/Objects/interpolationobject.c +++ b/Objects/interpolationobject.c @@ -54,7 +54,7 @@ typedef struct { Interpolation.__new__ as interpolation_new value: object - expression: object(subclass_of='&PyUnicode_Type') + expression: object(subclass_of='&PyUnicode_Type', c_default='&_Py_STR(empty)') = "" conversion: object(converter='_conversion_converter') = None format_spec: object(subclass_of='&PyUnicode_Type', c_default='&_Py_STR(empty)') = "" [clinic start generated code]*/ @@ -63,7 +63,7 @@ static PyObject * interpolation_new_impl(PyTypeObject *type, PyObject *value, PyObject *expression, PyObject *conversion, PyObject *format_spec) -/*[clinic end generated code: output=6488e288765bc1a9 input=d91711024068528c]*/ +/*[clinic end generated code: output=6488e288765bc1a9 input=fc5c285c1dd23d36]*/ { interpolationobject *self = PyObject_GC_New(interpolationobject, type); if (!self) { @@ -137,6 +137,8 @@ interpolation_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef interpolation_methods[] = { {"__reduce__", interpolation_reduce, METH_NOARGS, PyDoc_STR("__reduce__() -> (cls, state)")}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/longobject.c b/Objects/longobject.c index 40d90ecf4fa068..6a35413136fe3f 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -324,7 +324,7 @@ _PyLong_Negate(PyLongObject **x_p) PyLongObject *x; x = (PyLongObject *)*x_p; - if (Py_REFCNT(x) == 1) { + if (_PyObject_IsUniquelyReferenced((PyObject *)x)) { _PyLong_FlipSign(x); return; } @@ -971,16 +971,9 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, ++numsignificantbytes; } - /* How many Python int digits do we need? We have - 8*numsignificantbytes bits, and each Python int digit has - PyLong_SHIFT bits, so it's the ceiling of the quotient. */ - /* catch overflow before it happens */ - if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { - PyErr_SetString(PyExc_OverflowError, - "byte array too long to convert to int"); - return NULL; - } - ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + /* avoid integer overflow */ + ndigits = numsignificantbytes / PyLong_SHIFT * 8 + + (numsignificantbytes % PyLong_SHIFT * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; v = long_alloc(ndigits); if (v == NULL) return NULL; @@ -1145,13 +1138,19 @@ _PyLong_AsByteArray(PyLongObject* v, *p = (unsigned char)(accum & 0xff); p += pincr; } - else if (j == n && n > 0 && is_signed) { + else if (j == n && is_signed) { /* The main loop filled the byte array exactly, so the code just above didn't get to ensure there's a sign bit, and the loop below wouldn't add one either. Make sure a sign bit exists. */ - unsigned char msb = *(p - pincr); - int sign_bit_set = msb >= 0x80; + int sign_bit_set; + if (n > 0) { + unsigned char msb = *(p - pincr); + sign_bit_set = msb >= 0x80; + } + else { + sign_bit_set = 0; + } assert(accumbits == 0); if (sign_bit_set == do_twos_comp) return 0; @@ -1760,6 +1759,10 @@ UNSIGNED_INT_CONVERTER(UnsignedInt, unsigned int) UNSIGNED_INT_CONVERTER(UnsignedLong, unsigned long) UNSIGNED_INT_CONVERTER(UnsignedLongLong, unsigned long long) UNSIGNED_INT_CONVERTER(Size_t, size_t) +UNSIGNED_INT_CONVERTER(UInt8, uint8_t) +UNSIGNED_INT_CONVERTER(UInt16, uint16_t) +UNSIGNED_INT_CONVERTER(UInt32, uint32_t) +UNSIGNED_INT_CONVERTER(UInt64, uint64_t) #define CHECK_BINOP(v,w) \ @@ -5751,7 +5754,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) assert(size_a >= 0); _PyLong_SetSignAndDigitCount(c, 1, size_a); } - else if (Py_REFCNT(a) == 1) { + else if (_PyObject_IsUniquelyReferenced((PyObject *)a)) { c = (PyLongObject*)Py_NewRef(a); } else { @@ -5765,7 +5768,8 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) assert(size_a >= 0); _PyLong_SetSignAndDigitCount(d, 1, size_a); } - else if (Py_REFCNT(b) == 1 && size_a <= alloc_b) { + else if (_PyObject_IsUniquelyReferenced((PyObject *)b) + && size_a <= alloc_b) { d = (PyLongObject*)Py_NewRef(b); assert(size_a >= 0); _PyLong_SetSignAndDigitCount(d, 1, size_a); diff --git a/Objects/methodobject.c b/Objects/methodobject.c index c3dcd09ad1cdb6..e6e469ca270ac9 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -8,6 +8,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() /* undefine macro trampoline to PyCFunction_NewEx */ @@ -167,9 +168,7 @@ meth_dealloc(PyObject *self) { PyCFunctionObject *m = _PyCFunctionObject_CAST(self); PyObject_GC_UnTrack(m); - if (m->m_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject*) m); - } + FT_CLEAR_WEAKREFS(self, m->m_weakreflist); // We need to access ml_flags here rather than later. // `m->m_ml` might have the same lifetime // as `m_self` when it's dynamically allocated. diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f363ef173cbd46..b68584b5dd571d 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -14,6 +14,7 @@ #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "osdefs.h" // MAXPATHLEN @@ -827,8 +828,7 @@ module_dealloc(PyObject *self) if (verbose && m->md_name) { PySys_FormatStderr("# destroy %U\n", m->md_name); } - if (m->md_weaklist != NULL) - PyObject_ClearWeakRefs((PyObject *) m); + FT_CLEAR_WEAKREFS(self, m->md_weaklist); /* bpo-39824: Don't call m_free() if m_size > 0 and md_state=NULL */ if (m->md_def && m->md_def->m_free diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index caebe6bf543567..0fc2bcea4cb06e 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -124,9 +124,10 @@ namespace_repr(PyObject *ns) if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; - value = PyDict_GetItemWithError(d, key); - if (value != NULL) { + int has_key = PyDict_GetItemRef(d, key, &value); + if (has_key == 1) { item = PyUnicode_FromFormat("%U=%R", key, value); + Py_DECREF(value); if (item == NULL) { loop_error = 1; } @@ -135,7 +136,7 @@ namespace_repr(PyObject *ns) Py_DECREF(item); } } - else if (PyErr_Occurred()) { + else if (has_key < 0) { loop_error = 1; } } diff --git a/Objects/object.c b/Objects/object.c index 723b0427e69251..b88110faf921a3 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1906,34 +1906,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) int PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr = _PyObject_GetDictPtr(obj); - if (dictptr == NULL) { - if (_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_INLINE_VALUES) && - _PyObject_GetManagedDict(obj) == NULL - ) { - /* Was unable to convert to dict */ - PyErr_NoMemory(); - } - else { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - } - return -1; - } if (value == NULL) { PyErr_SetString(PyExc_TypeError, "cannot delete __dict__"); return -1; } - if (!PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - Py_BEGIN_CRITICAL_SECTION(obj); - Py_XSETREF(*dictptr, Py_NewRef(value)); - Py_END_CRITICAL_SECTION(); - return 0; + return _PyObject_SetDict(obj, value); } @@ -1996,9 +1973,25 @@ _dir_locals(void) PyObject *names; PyObject *locals; - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (!_PyErr_Occurred(tstate)) { + locals = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(locals); + } + } + if (locals == NULL) { return NULL; + } names = PyMapping_Keys(locals); Py_DECREF(locals); @@ -2638,8 +2631,12 @@ PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *op) _PyStackRef *stackpointer = frame->stackpointer; while (stackpointer > base) { stackpointer--; - if (op == PyStackRef_AsPyObjectBorrow(*stackpointer)) { - return PyStackRef_IsHeapSafe(*stackpointer); + _PyStackRef ref = *stackpointer; + if (PyStackRef_IsTaggedInt(ref)) { + continue; + } + if (op == PyStackRef_AsPyObjectBorrow(ref)) { + return PyStackRef_IsHeapSafe(ref); } } return 0; @@ -2930,57 +2927,28 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -#ifndef Py_GIL_DISABLED -/* We need to store a pointer in the refcount field of - * an object. It is important that we never store 0 (NULL). -* It is also important to not make the object appear immortal, -* or it might be untracked by the cycle GC. */ -static uintptr_t -pointer_to_safe_refcount(void *ptr) -{ - uintptr_t full = (uintptr_t)ptr; - assert((full & 3) == 0); -#if SIZEOF_VOID_P > 4 - uint32_t refcnt = (uint32_t)full; - if (refcnt >= (uint32_t)_Py_IMMORTAL_MINIMUM_REFCNT) { - full = full - ((uintptr_t)_Py_IMMORTAL_MINIMUM_REFCNT) + 1; - } - return full + 2; -#else - // Make the top two bits 0, so it appears mortal. - return (full >> 2) + 1; -#endif -} - -static void * -safe_refcount_to_pointer(uintptr_t refcnt) -{ -#if SIZEOF_VOID_P > 4 - if (refcnt & 1) { - refcnt += _Py_IMMORTAL_MINIMUM_REFCNT - 1; - } - return (void *)(refcnt - 2); -#else - return (void *)((refcnt -1) << 2); -#endif -} -#endif - /* Add op to the gcstate->trash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. + * call-stack depth gets large. op must be a gc'ed object, with refcount 0. + * Py_DECREF must already have been called on it. */ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, Py_REFCNT(op) == 0); + PyTypeObject *tp = Py_TYPE(op); + assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC); + int tracked = 0; + if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) { + tracked = _PyObject_GC_IS_TRACKED(op); + if (tracked) { + _PyObject_GC_UNTRACK(op); + } + } + uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked; #ifdef Py_GIL_DISABLED - op->ob_tid = (uintptr_t)tstate->delete_later; + op->ob_tid = tagged_ptr; #else - /* Store the delete_later pointer in the refcnt field. */ - uintptr_t refcnt = pointer_to_safe_refcount(tstate->delete_later); - *((uintptr_t*)op) = refcnt; - assert(!_Py_IsImmortal(op)); + _Py_AS_GC(op)->_gc_next = tagged_ptr; #endif tstate->delete_later = op; } @@ -2995,17 +2963,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - tstate->delete_later = (PyObject*) op->ob_tid; + uintptr_t tagged_ptr = op->ob_tid; op->ob_tid = 0; _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED); #else - /* Get the delete_later pointer from the refcnt field. - * See _PyTrash_thread_deposit_object(). */ - uintptr_t refcnt = *((uintptr_t*)op); - tstate->delete_later = safe_refcount_to_pointer(refcnt); - op->ob_refcnt = 0; + uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next; + _Py_AS_GC(op)->_gc_next = 0; #endif - + tstate->delete_later = (PyObject *)(tagged_ptr & ~1); + if (tagged_ptr & 1) { + _PyObject_GC_TRACK(op); + } /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but * Py_DECREF was already called on this object, and in @@ -3079,10 +3047,11 @@ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); + unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC; destructor dealloc = type->tp_dealloc; PyThreadState *tstate = _PyThreadState_GET(); intptr_t margin = _Py_RecursionLimit_GetMargin(tstate); - if (margin < 2) { + if (margin < 2 && gc_flag) { _PyTrash_thread_deposit_object(tstate, (PyObject *)op); return; } @@ -3128,7 +3097,7 @@ _Py_Dealloc(PyObject *op) Py_XDECREF(old_exc); Py_DECREF(type); #endif - if (tstate->delete_later && margin >= 4) { + if (tstate->delete_later && margin >= 4 && gc_flag) { _PyTrash_thread_destroy_chain(tstate); } } diff --git a/Objects/object_layout_312.png b/Objects/object_layout_312.png index a63d095ea0b19e..9ccb99f9db1b6d 100644 Binary files a/Objects/object_layout_312.png and b/Objects/object_layout_312.png differ diff --git a/Objects/object_layout_313.png b/Objects/object_layout_313.png index f7059c286c84e6..f3df59253da131 100644 Binary files a/Objects/object_layout_313.png and b/Objects/object_layout_313.png differ diff --git a/Objects/object_layout_full_312.png b/Objects/object_layout_full_312.png index 4f46ca86091d45..8c0aca4e06a302 100644 Binary files a/Objects/object_layout_full_312.png and b/Objects/object_layout_full_312.png differ diff --git a/Objects/object_layout_full_313.png b/Objects/object_layout_full_313.png index 7352ec69e5d8db..2d5c8569f628de 100644 Binary files a/Objects/object_layout_full_313.png and b/Objects/object_layout_full_313.png differ diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b209808da902da..092be84d2b9954 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); } } #endif @@ -1238,7 +1320,7 @@ work_queue_first(struct llist_node *head) } static void -process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, +process_queue(struct llist_node *head, _PyThreadStateImpl *tstate, bool keep_empty, delayed_dealloc_cb cb, void *state) { while (!llist_empty(head)) { @@ -1246,7 +1328,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, if (buf->rd_idx < buf->wr_idx) { struct _mem_work_item *item = &buf->array[buf->rd_idx]; - if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) { + if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) { return; } @@ -1270,11 +1352,11 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { assert(PyMutex_IsLocked(&queue->mutex)); - process_queue(&queue->head, qsbr, false, cb, state); + process_queue(&queue->head, tstate, false, cb, state); int more_work = !llist_empty(&queue->head); _Py_atomic_store_int_relaxed(&queue->has_work, more_work); @@ -1282,7 +1364,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue, static void maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { if (!_Py_atomic_load_int_relaxed(&queue->has_work)) { @@ -1291,7 +1373,7 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, // Try to acquire the lock, but don't block if it's already held. if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) { - process_interp_queue(queue, qsbr, cb, state); + process_interp_queue(queue, tstate, cb, state); PyMutex_Unlock(&queue->mutex); } } @@ -1302,11 +1384,13 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL); } void @@ -1316,10 +1400,10 @@ _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, voi _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state); } void @@ -1348,7 +1432,7 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) // Process the merged queue now (see gh-130794). _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); + process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL); PyMutex_Unlock(&interp->mem_free_queue.mutex); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index 891f6197401503..bdd37fae99c5c0 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -473,6 +473,7 @@ Potential Optimizations #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_tuple.h" // _PyTuple_Recycle() #include <stddef.h> // offsetof() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/odictobject.c.h" @@ -535,6 +536,7 @@ struct _odictnode { static Py_ssize_t _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); PyObject *value = NULL; PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys; Py_ssize_t ix; @@ -559,6 +561,7 @@ _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash) static int _odict_resize(PyODictObject *od) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_ssize_t size, i; _ODictNode **fast_nodes, *node; @@ -595,6 +598,7 @@ _odict_resize(PyODictObject *od) static Py_ssize_t _odict_get_index(PyODictObject *od, PyObject *key, Py_hash_t hash) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); PyDictKeysObject *keys; assert(key != NULL); @@ -615,6 +619,7 @@ _odict_get_index(PyODictObject *od, PyObject *key, Py_hash_t hash) static _ODictNode * _odict_find_node_hash(PyODictObject *od, PyObject *key, Py_hash_t hash) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_ssize_t index; if (_odict_EMPTY(od)) @@ -629,6 +634,7 @@ _odict_find_node_hash(PyODictObject *od, PyObject *key, Py_hash_t hash) static _ODictNode * _odict_find_node(PyODictObject *od, PyObject *key) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_ssize_t index; Py_hash_t hash; @@ -647,6 +653,7 @@ _odict_find_node(PyODictObject *od, PyObject *key) static void _odict_add_head(PyODictObject *od, _ODictNode *node) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); _odictnode_PREV(node) = NULL; _odictnode_NEXT(node) = _odict_FIRST(od); if (_odict_FIRST(od) == NULL) @@ -660,6 +667,7 @@ _odict_add_head(PyODictObject *od, _ODictNode *node) static void _odict_add_tail(PyODictObject *od, _ODictNode *node) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); _odictnode_PREV(node) = _odict_LAST(od); _odictnode_NEXT(node) = NULL; if (_odict_LAST(od) == NULL) @@ -674,6 +682,7 @@ _odict_add_tail(PyODictObject *od, _ODictNode *node) static int _odict_add_new_node(PyODictObject *od, PyObject *key, Py_hash_t hash) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_ssize_t i; _ODictNode *node; @@ -718,6 +727,7 @@ _odict_add_new_node(PyODictObject *od, PyObject *key, Py_hash_t hash) static void _odict_remove_node(PyODictObject *od, _ODictNode *node) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); if (_odict_FIRST(od) == node) _odict_FIRST(od) = _odictnode_NEXT(node); else if (_odictnode_PREV(node) != NULL) @@ -753,6 +763,7 @@ static int _odict_clear_node(PyODictObject *od, _ODictNode *node, PyObject *key, Py_hash_t hash) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_ssize_t i; assert(key != NULL); @@ -950,31 +961,34 @@ OrderedDict_fromkeys_impl(PyTypeObject *type, PyObject *seq, PyObject *value) return _PyDict_FromKeys((PyObject *)type, seq, value); } -/* __sizeof__() */ - -/* OrderedDict.__sizeof__() does not have a docstring. */ -PyDoc_STRVAR(odict_sizeof__doc__, ""); +/*[clinic input] +@critical_section +OrderedDict.__sizeof__ -> Py_ssize_t +[clinic start generated code]*/ -static PyObject * -odict_sizeof(PyObject *op, PyObject *Py_UNUSED(ignored)) +static Py_ssize_t +OrderedDict___sizeof___impl(PyODictObject *self) +/*[clinic end generated code: output=1a8560db8cf83ac5 input=655e989ae24daa6a]*/ { - PyODictObject *od = _PyODictObject_CAST(op); - Py_ssize_t res = _PyDict_SizeOf((PyDictObject *)od); - res += sizeof(_ODictNode *) * od->od_fast_nodes_size; /* od_fast_nodes */ - if (!_odict_EMPTY(od)) { - res += sizeof(_ODictNode) * PyODict_SIZE(od); /* linked-list */ + Py_ssize_t res = _PyDict_SizeOf_LockHeld((PyDictObject *)self); + res += sizeof(_ODictNode *) * self->od_fast_nodes_size; /* od_fast_nodes */ + if (!_odict_EMPTY(self)) { + res += sizeof(_ODictNode) * PyODict_SIZE(self); /* linked-list */ } - return PyLong_FromSsize_t(res); + return res; } -/* __reduce__() */ +/*[clinic input] +OrderedDict.__reduce__ + self as od: self(type="PyODictObject *") -PyDoc_STRVAR(odict_reduce__doc__, "Return state information for pickling"); +Return state information for pickling +[clinic start generated code]*/ static PyObject * -odict_reduce(PyObject *op, PyObject *Py_UNUSED(ignored)) +OrderedDict___reduce___impl(PyODictObject *od) +/*[clinic end generated code: output=71eeb81f760a6a8e input=b0467c7ec400fe5e]*/ { - register PyODictObject *od = _PyODictObject_CAST(op); PyObject *state, *result = NULL; PyObject *items_iter, *items, *args = NULL; @@ -1009,8 +1023,10 @@ odict_reduce(PyObject *op, PyObject *Py_UNUSED(ignored)) /* setdefault(): Skips __missing__() calls. */ +static int PyODict_SetItem_LockHeld(PyObject *self, PyObject *key, PyObject *value); /*[clinic input] +@critical_section OrderedDict.setdefault key: object @@ -1024,7 +1040,7 @@ Return the value for key if key is in the dictionary, else default. static PyObject * OrderedDict_setdefault_impl(PyODictObject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=97537cb7c28464b6 input=38e098381c1efbc6]*/ +/*[clinic end generated code: output=97537cb7c28464b6 input=d7b93e92734f99b5]*/ { PyObject *result = NULL; @@ -1034,7 +1050,7 @@ OrderedDict_setdefault_impl(PyODictObject *self, PyObject *key, if (PyErr_Occurred()) return NULL; assert(_odict_find_node(self, key) == NULL); - if (PyODict_SetItem((PyObject *)self, key, default_value) >= 0) { + if (PyODict_SetItem_LockHeld((PyObject *)self, key, default_value) >= 0) { result = Py_NewRef(default_value); } } @@ -1064,10 +1080,9 @@ static PyObject * _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, Py_hash_t hash) { - PyObject *value = NULL; - - Py_BEGIN_CRITICAL_SECTION(od); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); + PyObject *value = NULL; _ODictNode *node = _odict_find_node_hash(_PyODictObject_CAST(od), key, hash); if (node != NULL) { /* Pop the node first to avoid a possible dict resize (due to @@ -1092,7 +1107,6 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, PyErr_SetObject(PyExc_KeyError, key); } } - Py_END_CRITICAL_SECTION(); done: return value; @@ -1100,6 +1114,7 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, /* Skips __missing__() calls. */ /*[clinic input] +@critical_section OrderedDict.pop key: object @@ -1114,7 +1129,7 @@ raise a KeyError. static PyObject * OrderedDict_pop_impl(PyODictObject *self, PyObject *key, PyObject *default_value) -/*[clinic end generated code: output=7a6447d104e7494b input=7efe36601007dff7]*/ +/*[clinic end generated code: output=7a6447d104e7494b input=a79988887b4a651f]*/ { Py_hash_t hash = PyObject_Hash(key); if (hash == -1) @@ -1126,6 +1141,7 @@ OrderedDict_pop_impl(PyODictObject *self, PyObject *key, /* popitem() */ /*[clinic input] +@critical_section OrderedDict.popitem last: bool = True @@ -1137,7 +1153,7 @@ Pairs are returned in LIFO order if last is true or FIFO order if false. static PyObject * OrderedDict_popitem_impl(PyODictObject *self, int last) -/*[clinic end generated code: output=98e7d986690d49eb input=d992ac5ee8305e1a]*/ +/*[clinic end generated code: output=98e7d986690d49eb input=8aafc7433e0a40e7]*/ { PyObject *key, *value, *item = NULL; _ODictNode *node; @@ -1166,6 +1182,9 @@ OrderedDict_popitem_impl(PyODictObject *self, int last) PyDoc_STRVAR(odict_keys__doc__, ""); static PyObject * odictkeys_new(PyObject *od, PyObject *Py_UNUSED(ignored)); /* forward */ +static int +_PyODict_SetItem_KnownHash_LockHeld(PyObject *od, PyObject *key, PyObject *value, + Py_hash_t hash); /* forward */ /* values() */ @@ -1191,32 +1210,36 @@ static PyObject * mutablemapping_update(PyObject *, PyObject *, PyObject *); #define odict_update mutablemapping_update -/* clear() */ +/*[clinic input] +@critical_section +OrderedDict.clear -PyDoc_STRVAR(odict_clear__doc__, - "od.clear() -> None. Remove all items from od."); +Remove all items from ordered dict. +[clinic start generated code]*/ static PyObject * -odict_clear(PyObject *op, PyObject *Py_UNUSED(ignored)) +OrderedDict_clear_impl(PyODictObject *self) +/*[clinic end generated code: output=a1a76d1322f556c5 input=08b12322e74c535c]*/ { - register PyODictObject *od = _PyODictObject_CAST(op); - PyDict_Clear(op); - _odict_clear_nodes(od); + _PyDict_Clear_LockHeld((PyObject *)self); + _odict_clear_nodes(self); Py_RETURN_NONE; } /* copy() */ -/* forward */ -static int _PyODict_SetItem_KnownHash(PyObject *, PyObject *, PyObject *, - Py_hash_t); +/*[clinic input] +@critical_section +OrderedDict.copy + self as od: self -PyDoc_STRVAR(odict_copy__doc__, "od.copy() -> a shallow copy of od"); +A shallow copy of ordered dict. +[clinic start generated code]*/ static PyObject * -odict_copy(PyObject *op, PyObject *Py_UNUSED(ignored)) +OrderedDict_copy_impl(PyObject *od) +/*[clinic end generated code: output=9cdbe7394aecc576 input=e329951ae617ed48]*/ { - register PyODictObject *od = _PyODictObject_CAST(op); _ODictNode *node; PyObject *od_copy; @@ -1236,8 +1259,8 @@ odict_copy(PyObject *op, PyObject *Py_UNUSED(ignored)) PyErr_SetObject(PyExc_KeyError, key); goto fail; } - if (_PyODict_SetItem_KnownHash((PyObject *)od_copy, key, value, - _odictnode_HASH(node)) != 0) + if (_PyODict_SetItem_KnownHash_LockHeld((PyObject *)od_copy, key, value, + _odictnode_HASH(node)) != 0) goto fail; } } @@ -1285,6 +1308,7 @@ odict_reversed(PyObject *op, PyObject *Py_UNUSED(ignored)) /* move_to_end() */ /*[clinic input] +@critical_section OrderedDict.move_to_end key: object @@ -1297,7 +1321,7 @@ Raise KeyError if the element does not exist. static PyObject * OrderedDict_move_to_end_impl(PyODictObject *self, PyObject *key, int last) -/*[clinic end generated code: output=fafa4c5cc9b92f20 input=d6ceff7132a2fcd7]*/ +/*[clinic end generated code: output=fafa4c5cc9b92f20 input=09f8bc7053c0f6d4]*/ { _ODictNode *node; @@ -1338,10 +1362,8 @@ static PyMethodDef odict_methods[] = { /* overridden dict methods */ ORDEREDDICT_FROMKEYS_METHODDEF - {"__sizeof__", odict_sizeof, METH_NOARGS, - odict_sizeof__doc__}, - {"__reduce__", odict_reduce, METH_NOARGS, - odict_reduce__doc__}, + ORDEREDDICT___SIZEOF___METHODDEF + ORDEREDDICT___REDUCE___METHODDEF ORDEREDDICT_SETDEFAULT_METHODDEF ORDEREDDICT_POP_METHODDEF ORDEREDDICT_POPITEM_METHODDEF @@ -1353,11 +1375,8 @@ static PyMethodDef odict_methods[] = { odict_items__doc__}, {"update", _PyCFunction_CAST(odict_update), METH_VARARGS | METH_KEYWORDS, odict_update__doc__}, - {"clear", odict_clear, METH_NOARGS, - odict_clear__doc__}, - {"copy", odict_copy, METH_NOARGS, - odict_copy__doc__}, - + ORDEREDDICT_CLEAR_METHODDEF + ORDEREDDICT_COPY_METHODDEF /* new methods */ {"__reversed__", odict_reversed, METH_NOARGS, odict_reversed__doc__}, @@ -1391,8 +1410,7 @@ odict_dealloc(PyObject *op) PyObject_GC_UnTrack(self); Py_XDECREF(self->od_inst_dict); - if (self->od_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)self); + FT_CLEAR_WEAKREFS(op, self->od_weakreflist); _odict_clear_nodes(self); PyDict_Type.tp_dealloc((PyObject *)self); @@ -1457,7 +1475,8 @@ odict_tp_clear(PyObject *op) { PyODictObject *od = _PyODictObject_CAST(op); Py_CLEAR(od->od_inst_dict); - PyDict_Clear((PyObject *)od); + // cannot use lock held variant as critical section is not held here + PyDict_Clear(op); _odict_clear_nodes(od); return 0; } @@ -1465,7 +1484,7 @@ odict_tp_clear(PyObject *op) /* tp_richcompare */ static PyObject * -odict_richcompare(PyObject *v, PyObject *w, int op) +odict_richcompare_lock_held(PyObject *v, PyObject *w, int op) { if (!PyODict_Check(v) || !PyDict_Check(w)) { Py_RETURN_NOTIMPLEMENTED; @@ -1498,6 +1517,16 @@ odict_richcompare(PyObject *v, PyObject *w, int op) } } +static PyObject * +odict_richcompare(PyObject *v, PyObject *w, int op) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION2(v, w); + res = odict_richcompare_lock_held(v, w, op); + Py_END_CRITICAL_SECTION2(); + return res; +} + /* tp_iter */ static PyObject * @@ -1517,7 +1546,7 @@ odict_init(PyObject *self, PyObject *args, PyObject *kwds) if (len == -1) return -1; if (len > 1) { - const char *msg = "expected at most 1 arguments, got %zd"; + const char *msg = "expected at most 1 argument, got %zd"; PyErr_Format(PyExc_TypeError, msg, len); return -1; } @@ -1588,10 +1617,11 @@ PyODict_New(void) } static int -_PyODict_SetItem_KnownHash(PyObject *od, PyObject *key, PyObject *value, - Py_hash_t hash) +_PyODict_SetItem_KnownHash_LockHeld(PyObject *od, PyObject *key, PyObject *value, + Py_hash_t hash) { - int res = _PyDict_SetItem_KnownHash(od, key, value, hash); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); + int res = _PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)od, key, value, hash); if (res == 0) { res = _odict_add_new_node(_PyODictObject_CAST(od), key, hash); if (res < 0) { @@ -1604,18 +1634,32 @@ _PyODict_SetItem_KnownHash(PyObject *od, PyObject *key, PyObject *value, return res; } -int -PyODict_SetItem(PyObject *od, PyObject *key, PyObject *value) + +static int +PyODict_SetItem_LockHeld(PyObject *od, PyObject *key, PyObject *value) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); Py_hash_t hash = PyObject_Hash(key); - if (hash == -1) + if (hash == -1) { return -1; - return _PyODict_SetItem_KnownHash(od, key, value, hash); + } + return _PyODict_SetItem_KnownHash_LockHeld(od, key, value, hash); } int -PyODict_DelItem(PyObject *od, PyObject *key) +PyODict_SetItem(PyObject *od, PyObject *key, PyObject *value) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(od); + res = PyODict_SetItem_LockHeld(od, key, value); + Py_END_CRITICAL_SECTION(); + return res; +} + +int +PyODict_DelItem_LockHeld(PyObject *od, PyObject *key) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(od); int res; Py_hash_t hash = PyObject_Hash(key); if (hash == -1) @@ -1623,9 +1667,18 @@ PyODict_DelItem(PyObject *od, PyObject *key) res = _odict_clear_node(_PyODictObject_CAST(od), NULL, key, hash); if (res < 0) return -1; - return _PyDict_DelItem_KnownHash(od, key, hash); + return _PyDict_DelItem_KnownHash_LockHeld(od, key, hash); } +int +PyODict_DelItem(PyObject *od, PyObject *key) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(od); + res = PyODict_DelItem_LockHeld(od, key); + Py_END_CRITICAL_SECTION(); + return res; +} /* ------------------------------------------- * The OrderedDict views (keys/values/items) @@ -1667,14 +1720,14 @@ odictiter_traverse(PyObject *op, visitproc visit, void *arg) /* In order to protect against modifications during iteration, we track * the current key instead of the current node. */ static PyObject * -odictiter_nextkey(odictiterobject *di) +odictiter_nextkey_lock_held(odictiterobject *di) { + assert(di->di_odict != NULL); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(di->di_odict); PyObject *key = NULL; _ODictNode *node; int reversed = di->kind & _odict_ITER_REVERSED; - if (di->di_odict == NULL) - return NULL; if (di->di_current == NULL) goto done; /* We're already done. */ @@ -1719,8 +1772,23 @@ odictiter_nextkey(odictiterobject *di) return key; } + +static PyObject * +odictiter_nextkey(odictiterobject *di) +{ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(di); + if (di->di_odict == NULL) { + return NULL; + } + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(di->di_odict); + res = odictiter_nextkey_lock_held(di); + Py_END_CRITICAL_SECTION(); + return res; +} + static PyObject * -odictiter_iternext(PyObject *op) +odictiter_iternext_lock_held(PyObject *op) { odictiterobject *di = (odictiterobject*)op; PyObject *result, *value; @@ -1734,14 +1802,12 @@ odictiter_iternext(PyObject *op) return key; } - value = PyODict_GetItem((PyObject *)di->di_odict, key); /* borrowed */ - if (value == NULL) { + if (PyDict_GetItemRef((PyObject *)di->di_odict, key, &value) != 1) { if (!PyErr_Occurred()) PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); goto done; } - Py_INCREF(value); /* Handle the values case. */ if (!(di->kind & _odict_ITER_KEYS)) { @@ -1752,7 +1818,7 @@ odictiter_iternext(PyObject *op) /* Handle the items case. */ result = di->di_result; - if (Py_REFCNT(result) == 1) { + if (_PyObject_IsUniquelyReferenced(result)) { /* not in use so we can reuse it * (the common case during iteration) */ Py_INCREF(result); @@ -1781,6 +1847,17 @@ odictiter_iternext(PyObject *op) return NULL; } +static PyObject * +odictiter_iternext(PyObject *op) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(op); + res = odictiter_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return res; +} + + /* No need for tp_clear because odictiterobject is not mutable. */ PyDoc_STRVAR(reduce_doc, "Return state information for pickling"); diff --git a/Objects/picklebufobject.c b/Objects/picklebufobject.c index 3ce800de04c208..50f17687bc4365 100644 --- a/Objects/picklebufobject.c +++ b/Objects/picklebufobject.c @@ -1,6 +1,7 @@ /* PickleBuffer object implementation */ #include "Python.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include <stddef.h> typedef struct { @@ -111,8 +112,7 @@ picklebuf_dealloc(PyObject *op) { PyPickleBufferObject *self = (PyPickleBufferObject*)op; PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); + FT_CLEAR_WEAKREFS(op, self->weakreflist); PyBuffer_Release(&self->view); Py_TYPE(self)->tp_free((PyObject *) self); } diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index f8cdfe68a6435e..e93346fb27703f 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -1042,6 +1042,11 @@ longrangeiter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored)) static PyObject * longrangeiter_setstate(PyObject *op, PyObject *state) { + if (!PyLong_CheckExact(state)) { + PyErr_Format(PyExc_TypeError, "state must be an int, not %T", state); + return NULL; + } + longrangeiterobject *r = (longrangeiterobject*)op; PyObject *zero = _PyLong_GetZero(); // borrowed reference int cmp; diff --git a/Objects/setobject.c b/Objects/setobject.c index 8aa6b0d180907b..ff4844b24371e8 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -40,6 +40,7 @@ #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED() #include "pycore_pyerrors.h" // _PyErr_SetKeyError() #include "pycore_setobject.h" // _PySet_NextEntry() definition +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "stringlib/eq.h" // unicode_eq() #include <stddef.h> // offsetof() @@ -85,6 +86,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) int probes; int cmp; + int frozenset = PyFrozenSet_CheckExact(so); + while (1) { entry = &so->table[i]; probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0; @@ -101,13 +104,20 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) && unicode_eq(startkey, key)) return entry; table = so->table; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table != so->table || entry->key != startkey) - return set_lookkey(so, key, hash); + if (frozenset) { + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + if (cmp < 0) + return NULL; + } else { + // incref startkey because it can be removed from the set by the compare + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table != so->table || entry->key != startkey) + return set_lookkey(so, key, hash); + } if (cmp > 0) return entry; mask = so->mask; @@ -536,8 +546,7 @@ set_dealloc(PyObject *self) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(so); - if (so->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) so); + FT_CLEAR_WEAKREFS(self, so->weakreflist); for (entry = so->table; used > 0; entry++) { if (entry->key && entry->key != dummy) { @@ -2234,10 +2243,16 @@ set_contains_lock_held(PySetObject *so, PyObject *key) int _PySet_Contains(PySetObject *so, PyObject *key) { + assert(so); + int rv; - Py_BEGIN_CRITICAL_SECTION(so); - rv = set_contains_lock_held(so, key); - Py_END_CRITICAL_SECTION(); + if (PyFrozenSet_CheckExact(so)) { + rv = set_contains_lock_held(so, key); + } else { + Py_BEGIN_CRITICAL_SECTION(so); + rv = set_contains_lock_held(so, key); + Py_END_CRITICAL_SECTION(); + } return rv; } @@ -2428,7 +2443,7 @@ set_init(PyObject *so, PyObject *args, PyObject *kwds) if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable)) return -1; - if (Py_REFCNT(self) == 1 && self->fill == 0) { + if (_PyObject_IsUniquelyReferenced((PyObject *)self) && self->fill == 0) { self->hash = -1; if (iterable == NULL) { return 0; @@ -2731,7 +2746,9 @@ PySet_Contains(PyObject *anyset, PyObject *key) PyErr_BadInternalCall(); return -1; } - + if (PyFrozenSet_CheckExact(anyset)) { + return set_contains_key((PySetObject *)anyset, key); + } int rv; Py_BEGIN_CRITICAL_SECTION(anyset); rv = set_contains_key((PySetObject *)anyset, key); @@ -2758,7 +2775,7 @@ int PySet_Add(PyObject *anyset, PyObject *key) { if (!PySet_Check(anyset) && - (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) { + (!PyFrozenSet_Check(anyset) || !_PyObject_IsUniquelyReferenced(anyset))) { PyErr_BadInternalCall(); return -1; } diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 05e700b06258f0..b447865c986bef 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -595,7 +595,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } else { @@ -604,7 +604,7 @@ STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } } @@ -668,7 +668,7 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } else { @@ -677,7 +677,7 @@ STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) { + if (i + 1 <= w && !STRINGLIB_BLOOM(mask, ss[i+1])) { i = i + m; } } diff --git a/Objects/templateobject.c b/Objects/templateobject.c index 7d356980b56cbb..a05208e4c8fc8e 100644 --- a/Objects/templateobject.c +++ b/Objects/templateobject.c @@ -23,11 +23,15 @@ templateiter_next(PyObject *op) if (self->from_strings) { item = PyIter_Next(self->stringsiter); self->from_strings = 0; + if (item == NULL) { + return NULL; + } if (PyUnicode_GET_LENGTH(item) == 0) { Py_SETREF(item, PyIter_Next(self->interpolationsiter)); self->from_strings = 1; } - } else { + } + else { item = PyIter_Next(self->interpolationsiter); self->from_strings = 1; } @@ -144,13 +148,14 @@ template_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (last_was_str) { PyObject *laststring = PyTuple_GET_ITEM(strings, stringsidx - 1); PyObject *concat = PyUnicode_Concat(laststring, item); - Py_DECREF(laststring); if (!concat) { Py_DECREF(strings); Py_DECREF(interpolations); return NULL; } + /* Replace laststring with concat */ PyTuple_SET_ITEM(strings, stringsidx - 1, concat); + Py_DECREF(laststring); } else { PyTuple_SET_ITEM(strings, stringsidx++, Py_NewRef(item)); @@ -242,54 +247,6 @@ template_iter(PyObject *op) return (PyObject *)iter; } -static PyObject * -template_strings_append_str(PyObject *strings, PyObject *str) -{ - Py_ssize_t stringslen = PyTuple_GET_SIZE(strings); - PyObject *string = PyTuple_GET_ITEM(strings, stringslen - 1); - PyObject *concat = PyUnicode_Concat(string, str); - if (concat == NULL) { - return NULL; - } - - PyObject *newstrings = PyTuple_New(stringslen); - if (newstrings == NULL) { - Py_DECREF(concat); - return NULL; - } - - for (Py_ssize_t i = 0; i < stringslen - 1; i++) { - PyTuple_SET_ITEM(newstrings, i, Py_NewRef(PyTuple_GET_ITEM(strings, i))); - } - PyTuple_SET_ITEM(newstrings, stringslen - 1, concat); - - return newstrings; -} - -static PyObject * -template_strings_prepend_str(PyObject *strings, PyObject *str) -{ - Py_ssize_t stringslen = PyTuple_GET_SIZE(strings); - PyObject *string = PyTuple_GET_ITEM(strings, 0); - PyObject *concat = PyUnicode_Concat(str, string); - if (concat == NULL) { - return NULL; - } - - PyObject *newstrings = PyTuple_New(stringslen); - if (newstrings == NULL) { - Py_DECREF(concat); - return NULL; - } - - PyTuple_SET_ITEM(newstrings, 0, concat); - for (Py_ssize_t i = 1; i < stringslen; i++) { - PyTuple_SET_ITEM(newstrings, i, Py_NewRef(PyTuple_GET_ITEM(strings, i))); - } - - return newstrings; -} - static PyObject * template_strings_concat(PyObject *left, PyObject *right) { @@ -341,47 +298,17 @@ template_concat_templates(templateobject *self, templateobject *other) return newtemplate; } -static PyObject * -template_concat_template_str(templateobject *self, PyObject *other) -{ - PyObject *newstrings = template_strings_append_str(self->strings, other); - if (newstrings == NULL) { - return NULL; - } - - PyObject *newtemplate = _PyTemplate_Build(newstrings, self->interpolations); - Py_DECREF(newstrings); - return newtemplate; -} - -static PyObject * -template_concat_str_template(templateobject *self, PyObject *other) -{ - PyObject *newstrings = template_strings_prepend_str(self->strings, other); - if (newstrings == NULL) { - return NULL; - } - - PyObject *newtemplate = _PyTemplate_Build(newstrings, self->interpolations); - Py_DECREF(newstrings); - return newtemplate; -} - PyObject * _PyTemplate_Concat(PyObject *self, PyObject *other) { if (_PyTemplate_CheckExact(self) && _PyTemplate_CheckExact(other)) { return template_concat_templates((templateobject *) self, (templateobject *) other); } - else if ((_PyTemplate_CheckExact(self)) && PyUnicode_Check(other)) { - return template_concat_template_str((templateobject *) self, other); - } - else if (PyUnicode_Check(self) && (_PyTemplate_CheckExact(other))) { - return template_concat_str_template((templateobject *) other, self); - } - else { - Py_RETURN_NOTIMPLEMENTED; - } + + PyErr_Format(PyExc_TypeError, + "can only concatenate string.templatelib.Template (not \"%T\") to string.templatelib.Template", + other); + return NULL; } static PyObject * @@ -444,6 +371,8 @@ template_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef template_methods[] = { {"__reduce__", template_reduce, METH_NOARGS, NULL}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 9b31758485ca5e..c950d0da70fb86 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -118,7 +118,7 @@ int PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) { PyObject **p; - if (!PyTuple_Check(op) || Py_REFCNT(op) != 1) { + if (!PyTuple_Check(op) || !_PyObject_IsUniquelyReferenced(op)) { Py_XDECREF(newitem); PyErr_BadInternalCall(); return -1; @@ -156,6 +156,18 @@ _PyTuple_MaybeUntrack(PyObject *op) _PyObject_GC_UNTRACK(op); } +/* Fast, but conservative check if an object maybe tracked + May return true for an object that is not tracked, + Will always return true for an object that is tracked. + This is a temporary workaround until _PyObject_GC_IS_TRACKED + becomes fast and safe to call on non-GC objects. +*/ +static bool +maybe_tracked(PyObject *ob) +{ + return _PyType_IS_GC(Py_TYPE(ob)); +} + PyObject * PyTuple_Pack(Py_ssize_t n, ...) { @@ -163,6 +175,7 @@ PyTuple_Pack(Py_ssize_t n, ...) PyObject *o; PyObject **items; va_list vargs; + bool track = false; if (n == 0) { return tuple_get_empty(); @@ -177,10 +190,15 @@ PyTuple_Pack(Py_ssize_t n, ...) items = result->ob_item; for (i = 0; i < n; i++) { o = va_arg(vargs, PyObject *); + if (!track && maybe_tracked(o)) { + track = true; + } items[i] = Py_NewRef(o); } va_end(vargs); - _PyObject_GC_TRACK(result); + if (track) { + _PyObject_GC_TRACK(result); + } return (PyObject *)result; } @@ -377,11 +395,17 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) return NULL; } PyObject **dst = tuple->ob_item; + bool track = false; for (Py_ssize_t i = 0; i < n; i++) { PyObject *item = src[i]; + if (!track && maybe_tracked(item)) { + track = true; + } dst[i] = Py_NewRef(item); } - _PyObject_GC_TRACK(tuple); + if (track) { + _PyObject_GC_TRACK(tuple); + } return (PyObject *)tuple; } @@ -396,10 +420,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef *src, Py_ssize_t n) return NULL; } PyObject **dst = tuple->ob_item; + bool track = false; for (Py_ssize_t i = 0; i < n; i++) { - dst[i] = PyStackRef_AsPyObjectSteal(src[i]); + PyObject *item = PyStackRef_AsPyObjectSteal(src[i]); + if (!track && maybe_tracked(item)) { + track = true; + } + dst[i] = item; + } + if (track) { + _PyObject_GC_TRACK(tuple); } - _PyObject_GC_TRACK(tuple); return (PyObject *)tuple; } @@ -923,7 +954,7 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) v = (PyTupleObject *) *pv; if (v == NULL || !Py_IS_TYPE(v, &PyTuple_Type) || - (Py_SIZE(v) != 0 && Py_REFCNT(v) != 1)) { + (Py_SIZE(v) != 0 && !_PyObject_IsUniquelyReferenced(*pv))) { *pv = 0; Py_XDECREF(v); PyErr_BadInternalCall(); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a7ab69fef4c721..adbc0743161326 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -54,7 +54,6 @@ class object "PyObject *" "&PyBaseObject_Type" PyUnicode_CheckExact(name) && \ (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE) -#define NEXT_GLOBAL_VERSION_TAG _PyRuntime.types.next_version_tag #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag @@ -66,11 +65,11 @@ class object "PyObject *" "&PyBaseObject_Type" // be released and reacquired during a subclass update if there's contention // on the subclass lock. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) +#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUTEX(TYPE_LOCK) #define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() #define BEGIN_TYPE_DICT_LOCK(d) \ - Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) + Py_BEGIN_CRITICAL_SECTION2_MUTEX(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() @@ -157,8 +156,8 @@ static_ext_type_lookup(PyInterpreterState *interp, size_t index, assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); size_t full_index = index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; - int64_t interp_count = - _PyRuntime.types.managed_static.types[full_index].interp_count; + int64_t interp_count = _Py_atomic_load_int64( + &_PyRuntime.types.managed_static.types[full_index].interp_count); assert((interp_count == 0) == (_PyRuntime.types.managed_static.types[full_index].type == NULL)); *p_interp_count = interp_count; @@ -235,7 +234,7 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, : index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; assert((initial == 1) == - (_PyRuntime.types.managed_static.types[full_index].interp_count == 0)); + (_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) == 0)); (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, 1); @@ -284,7 +283,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, : &(interp->types.for_extensions.initialized[index]); assert(state != NULL); - assert(_PyRuntime.types.managed_static.types[full_index].interp_count > 0); + assert(_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) > 0); assert(_PyRuntime.types.managed_static.types[full_index].type == state->type); assert(state->type != NULL); @@ -294,7 +293,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, -1); if (final) { - assert(!_PyRuntime.types.managed_static.types[full_index].interp_count); + assert(!_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count)); _PyRuntime.types.managed_static.types[full_index].type = NULL; managed_static_type_index_clear(self); @@ -1258,6 +1257,19 @@ _PyType_GetVersionForCurrentState(PyTypeObject *tp) #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" #endif +static inline unsigned int +next_global_version_tag(void) +{ + unsigned int old; + do { + old = _Py_atomic_load_uint_relaxed(&_PyRuntime.types.next_version_tag); + if (old >= _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + return 0; + } + } while (!_Py_atomic_compare_exchange_uint(&_PyRuntime.types.next_version_tag, &old, old + 1)); + return old + 1; +} + static int assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) { @@ -1288,11 +1300,12 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) } if (type->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) { /* static types */ - if (NEXT_GLOBAL_VERSION_TAG > _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + unsigned int next_version_tag = next_global_version_tag(); + if (next_version_tag == 0) { /* We have run out of version numbers */ return 0; } - set_version_unlocked(type, NEXT_GLOBAL_VERSION_TAG++); + set_version_unlocked(type, next_version_tag); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -1421,9 +1434,13 @@ type_set_name(PyObject *tp, PyObject *value, void *Py_UNUSED(closure)) return -1; } + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); type->tp_name = tp_name; - Py_SETREF(((PyHeapTypeObject*)type)->ht_name, Py_NewRef(value)); - + PyObject *old_name = ((PyHeapTypeObject*)type)->ht_name; + ((PyHeapTypeObject*)type)->ht_name = Py_NewRef(value); + _PyEval_StartTheWorld(interp); + Py_DECREF(old_name); return 0; } @@ -1641,7 +1658,7 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); static int -mro_hierarchy(PyTypeObject *type, PyObject *temp) +mro_hierarchy_for_complete_type(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); @@ -1652,6 +1669,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) return res; } PyObject *new_mro = lookup_tp_mro(type); + assert(new_mro != NULL); PyObject *tuple; if (old_mro != NULL) { @@ -1696,7 +1714,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp); + res = mro_hierarchy_for_complete_type(subclass, temp); if (res < 0) { break; } @@ -1778,7 +1796,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp) < 0) { + if (mro_hierarchy_for_complete_type(type, temp) < 0) { goto undo; } Py_DECREF(temp); @@ -2065,19 +2083,46 @@ type_set_annotations(PyObject *tp, PyObject *value, void *Py_UNUSED(closure)) return -1; } - int result; PyObject *dict = PyType_GetDict(type); - if (value != NULL) { - /* set */ - result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); - } else { - /* delete */ - result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); - if (result == 0) { - PyErr_SetString(PyExc_AttributeError, "__annotations__"); + int result = PyDict_ContainsString(dict, "__annotations__"); + if (result < 0) { + Py_DECREF(dict); + return -1; + } + if (result) { + // If __annotations__ is currently in the dict, we update it, + if (value != NULL) { + result = PyDict_SetItem(dict, &_Py_ID(__annotations__), value); + } else { + result = PyDict_Pop(dict, &_Py_ID(__annotations__), NULL); + if (result == 0) { + // Somebody else just deleted it? + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } + if (result < 0) { Py_DECREF(dict); return -1; } + // Also clear __annotations_cache__ just in case. + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + } + else { + // Else we update only __annotations_cache__. + if (value != NULL) { + /* set */ + result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); + } else { + /* delete */ + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + if (result == 0) { + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } } if (result < 0) { Py_DECREF(dict); @@ -3247,6 +3292,7 @@ mro_implementation_unlocked(PyTypeObject *type) */ PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, 0)); PyObject *base_mro = lookup_tp_mro(base); + assert(base_mro != NULL); Py_ssize_t k = PyTuple_GET_SIZE(base_mro); PyObject *result = PyTuple_New(k + 1); if (result == NULL) { @@ -3281,9 +3327,12 @@ mro_implementation_unlocked(PyTypeObject *type) return NULL; } + PyObject *mro_to_merge; for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); - to_merge[i] = lookup_tp_mro(base); + mro_to_merge = lookup_tp_mro(base); + assert(mro_to_merge != NULL); + to_merge[i] = mro_to_merge; } to_merge[n] = bases; @@ -3649,10 +3698,39 @@ subtype_dict(PyObject *obj, void *context) return PyObject_GenericGetDict(obj, context); } +int +_PyObject_SetDict(PyObject *obj, PyObject *value) +{ + if (value != NULL && !PyDict_Check(value)) { + PyErr_Format(PyExc_TypeError, + "__dict__ must be set to a dictionary, " + "not a '%.200s'", Py_TYPE(value)->tp_name); + return -1; + } + if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + return _PyObject_SetManagedDict(obj, value); + } + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr == NULL) { + PyErr_SetString(PyExc_AttributeError, + "This object has no __dict__"); + return -1; + } + Py_BEGIN_CRITICAL_SECTION(obj); + PyObject *olddict = *dictptr; + FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); +#ifdef Py_GIL_DISABLED + _PyObject_XDecRefDelayed(olddict); +#else + Py_XDECREF(olddict); +#endif + Py_END_CRITICAL_SECTION(); + return 0; +} + static int subtype_setdict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr; PyTypeObject *base; base = get_builtin_base_with_dict(Py_TYPE(obj)); @@ -3670,28 +3748,7 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) } return func(descr, obj, value); } - /* Almost like PyObject_GenericSetDict, but allow __dict__ to be deleted. */ - if (value != NULL && !PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - - if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - return _PyObject_SetManagedDict(obj, value); - } - else { - dictptr = _PyObject_ComputedDictPointer(obj); - if (dictptr == NULL) { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - return -1; - } - Py_CLEAR(*dictptr); - *dictptr = Py_XNewRef(value); - } - return 0; + return _PyObject_SetDict(obj, value); } static PyObject * @@ -5476,23 +5533,15 @@ get_base_by_token_recursive(PyObject *bases, void *token) } int -PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) +_PyType_GetBaseByToken_Borrow(PyTypeObject *type, void *token, PyTypeObject **result) { + assert(token != NULL); + assert(PyType_Check(type)); + if (result != NULL) { *result = NULL; } - if (token == NULL) { - PyErr_Format(PyExc_SystemError, - "PyType_GetBaseByToken called with token=NULL"); - return -1; - } - if (!PyType_Check(type)) { - PyErr_Format(PyExc_TypeError, - "expected a type, got a '%T' object", type); - return -1; - } - if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). @@ -5501,7 +5550,7 @@ PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) if (((PyHeapTypeObject*)type)->ht_token == token) { found: if (result != NULL) { - *result = (PyTypeObject *)Py_NewRef(type); + *result = type; } return 1; } @@ -5535,6 +5584,30 @@ PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) return 0; } +int +PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) +{ + if (result != NULL) { + *result = NULL; + } + if (token == NULL) { + PyErr_Format(PyExc_SystemError, + "PyType_GetBaseByToken called with token=NULL"); + return -1; + } + if (!PyType_Check(type)) { + PyErr_Format(PyExc_TypeError, + "expected a type, got a '%T' object", type); + return -1; + } + + int res = _PyType_GetBaseByToken_Borrow(type, token, result); + if (res > 0 && result) { + Py_INCREF(*result); + } + return res; +} + void * PyObject_GetTypeData(PyObject *obj, PyTypeObject *cls) @@ -6124,6 +6197,18 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_INLINE_VALUES)); assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_MANAGED_DICT)); +#ifdef Py_GIL_DISABLED + // gh-139103: Enable deferred refcounting for functions assigned + // to type objects. This is important for `dataclass.__init__`, + // which is generated dynamically. + if (value != NULL && + PyFunction_Check(value) && + !_PyObject_HasDeferredRefcount(value)) + { + PyUnstable_Object_EnableDeferredRefcount(value); + } +#endif + PyObject *old_value = NULL; PyObject *descr = _PyType_LookupRef(metatype, name); if (descr != NULL) { @@ -8571,6 +8656,7 @@ type_ready_inherit(PyTypeObject *type) // Inherit slots PyObject *mro = lookup_tp_mro(type); + assert(mro != NULL); Py_ssize_t n = PyTuple_GET_SIZE(mro); for (Py_ssize_t i = 1; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(mro, i); @@ -8669,7 +8755,11 @@ type_ready_set_new(PyTypeObject *type, int initial) && base == &PyBaseObject_Type && !(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { - type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + if (initial) { + type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + } else { + assert(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION); + } } if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { @@ -8683,13 +8773,17 @@ type_ready_set_new(PyTypeObject *type, int initial) } } else { - // tp_new is NULL: inherit tp_new from base - type->tp_new = base->tp_new; + if (initial) { + // tp_new is NULL: inherit tp_new from base + type->tp_new = base->tp_new; + } } } else { // Py_TPFLAGS_DISALLOW_INSTANTIATION sets tp_new to NULL - type->tp_new = NULL; + if (initial) { + type->tp_new = NULL; + } } return 0; } @@ -8822,7 +8916,12 @@ type_ready(PyTypeObject *type, int initial) } /* All done -- set the ready flag */ - type_add_flags(type, Py_TPFLAGS_READY); + if (initial) { + type_add_flags(type, Py_TPFLAGS_READY); + } else { + assert(type->tp_flags & Py_TPFLAGS_READY); + } + stop_readying(type); assert(_PyType_CheckConsistency(type)); @@ -8871,15 +8970,16 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_DICT)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF)); - if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { - assert(initial); + if (initial) { + assert((self->tp_flags & Py_TPFLAGS_READY) == 0); type_add_flags(self, _Py_TPFLAGS_STATIC_BUILTIN); type_add_flags(self, Py_TPFLAGS_IMMUTABLETYPE); - assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); if (self->tp_version_tag == 0) { - _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++); + unsigned int next_version_tag = next_global_version_tag(); + assert(next_version_tag != 0); + _PyType_SetVersion(self, next_version_tag); } } else { @@ -9682,6 +9782,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, @@ -10124,6 +10229,7 @@ slot_tp_hash(PyObject *self) return PyObject_HashNotImplemented(self); } if (!PyLong_Check(res)) { + Py_DECREF(res); PyErr_SetString(PyExc_TypeError, "__hash__ method should return an integer"); return -1; @@ -10211,7 +10317,10 @@ _Py_slot_tp_getattr_hook(PyObject *self, PyObject *name) getattr = _PyType_LookupRef(tp, &_Py_ID(__getattr__)); if (getattr == NULL) { /* No __getattr__ hook: use a simpler dispatcher */ +#ifndef Py_GIL_DISABLED + // Replacing the slot is only thread-safe if there is a GIL. tp->tp_getattro = _Py_slot_tp_getattro; +#endif return _Py_slot_tp_getattro(self, name); } /* speed hack: we could use lookup_maybe, but that would resolve the @@ -10336,9 +10445,11 @@ slot_tp_descr_get(PyObject *self, PyObject *obj, PyObject *type) get = _PyType_LookupRef(tp, &_Py_ID(__get__)); if (get == NULL) { +#ifndef Py_GIL_DISABLED /* Avoid further slowdowns */ if (tp->tp_descr_get == slot_tp_descr_get) tp->tp_descr_get = NULL; +#endif return Py_NewRef(self); } if (obj == NULL) diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 6c199a52aa0ae6..e6dc4360eaeb62 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs) for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) { PyObject *item = PyTuple_GET_ITEM(value, i); if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { PyUnicodeWriter_Discard(writer); return NULL; } @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p) } if (p == (PyObject *)&_PyNone_Type) { - return PyUnicodeWriter_WriteUTF8(writer, "None", 4); + return PyUnicodeWriter_WriteASCII(writer, "None", 4); } if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 && @@ -472,7 +472,7 @@ typevar_dealloc(PyObject *self) _PyObject_GC_UNTRACK(self); - Py_DECREF(tv->name); + Py_XDECREF(tv->name); Py_XDECREF(tv->bound); Py_XDECREF(tv->evaluate_bound); Py_XDECREF(tv->constraints); @@ -491,6 +491,7 @@ typevar_traverse(PyObject *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); typevarobject *tv = typevarobject_CAST(self); + Py_VISIT(tv->name); Py_VISIT(tv->bound); Py_VISIT(tv->evaluate_bound); Py_VISIT(tv->constraints); @@ -505,6 +506,7 @@ static int typevar_clear(PyObject *op) { typevarobject *self = typevarobject_CAST(op); + Py_CLEAR(self->name); Py_CLEAR(self->bound); Py_CLEAR(self->evaluate_bound); Py_CLEAR(self->constraints); @@ -1171,7 +1173,7 @@ paramspec_dealloc(PyObject *self) _PyObject_GC_UNTRACK(self); - Py_DECREF(ps->name); + Py_XDECREF(ps->name); Py_XDECREF(ps->bound); Py_XDECREF(ps->default_value); Py_XDECREF(ps->evaluate_default); @@ -1187,6 +1189,7 @@ paramspec_traverse(PyObject *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); paramspecobject *ps = paramspecobject_CAST(self); + Py_VISIT(ps->name); Py_VISIT(ps->bound); Py_VISIT(ps->default_value); Py_VISIT(ps->evaluate_default); @@ -1198,6 +1201,7 @@ static int paramspec_clear(PyObject *op) { paramspecobject *self = paramspecobject_CAST(op); + Py_CLEAR(self->name); Py_CLEAR(self->bound); Py_CLEAR(self->default_value); Py_CLEAR(self->evaluate_default); @@ -1519,7 +1523,7 @@ typevartuple_dealloc(PyObject *self) _PyObject_GC_UNTRACK(self); typevartupleobject *tvt = typevartupleobject_CAST(self); - Py_DECREF(tvt->name); + Py_XDECREF(tvt->name); Py_XDECREF(tvt->default_value); Py_XDECREF(tvt->evaluate_default); PyObject_ClearManagedDict(self); @@ -1683,6 +1687,7 @@ typevartuple_traverse(PyObject *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); typevartupleobject *tvt = typevartupleobject_CAST(self); + Py_VISIT(tvt->name); Py_VISIT(tvt->default_value); Py_VISIT(tvt->evaluate_default); PyObject_VisitManagedDict(self, visit, arg); @@ -1693,6 +1698,7 @@ static int typevartuple_clear(PyObject *self) { typevartupleobject *tvt = typevartupleobject_CAST(self); + Py_CLEAR(tvt->name); Py_CLEAR(tvt->default_value); Py_CLEAR(tvt->evaluate_default); PyObject_ClearManagedDict(self); @@ -1851,7 +1857,7 @@ typealias_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); typealiasobject *ta = typealiasobject_CAST(self); - Py_DECREF(ta->name); + Py_XDECREF(ta->name); Py_XDECREF(ta->type_params); Py_XDECREF(ta->compute_value); Py_XDECREF(ta->value); @@ -2032,6 +2038,7 @@ static int typealias_traverse(PyObject *op, visitproc visit, void *arg) { typealiasobject *self = typealiasobject_CAST(op); + Py_VISIT(self->name); Py_VISIT(self->type_params); Py_VISIT(self->compute_value); Py_VISIT(self->value); @@ -2043,6 +2050,7 @@ static int typealias_clear(PyObject *op) { typealiasobject *self = typealiasobject_CAST(op); + Py_CLEAR(self->name); Py_CLEAR(self->type_params); Py_CLEAR(self->compute_value); Py_CLEAR(self->value); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb3e1c48fd4050..c71c5720ea090e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -56,7 +56,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_pyhash.h" // _Py_HashSecret_t #include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "pycore_template.h" // _PyTemplate_Concat() #include "pycore_tuple.h" // _PyTuple_FromArray() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI #include "pycore_unicodeobject.h" // struct _Py_unicode_state @@ -5498,7 +5497,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, if (maxchr <= 255) { memcpy(PyUnicode_1BYTE_DATA(u), s, pos); s += pos; - size -= pos; writer.pos = pos; } @@ -5546,7 +5544,6 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer, return 0; } s += decoded; - size -= decoded; } return unicode_decode_utf8_impl(writer, starts, s, end, @@ -6621,13 +6618,15 @@ _PyUnicode_GetNameCAPI(void) /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * -_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, +_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { const char *starts = s; + const char *initial_starts = starts; _PyUnicodeWriter writer; const char *end; PyObject *errorHandler = NULL; @@ -6635,7 +6634,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, _PyUnicode_Name_CAPI *ucnhash_capi; // so we can remember if we've seen an invalid escape char or not - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; if (size == 0) { if (consumed) { @@ -6723,9 +6723,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, } } if (ch > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = ch; + if (starts == initial_starts) { + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; + } } } WRITE_CHAR(ch); @@ -6820,9 +6823,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, goto error; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + if (starts == initial_starts) { + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; + } } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); @@ -6867,19 +6873,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, const char *errors, Py_ssize_t *consumed) { - const char *first_invalid_escape; - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, consumed, - &first_invalid_escape); + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "\"\\%.3s\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -6889,7 +6896,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -7704,10 +7711,6 @@ code_page_name(UINT code_page, PyObject **obj) *obj = NULL; if (code_page == CP_ACP) return "mbcs"; - if (code_page == CP_UTF7) - return "CP_UTF7"; - if (code_page == CP_UTF8) - return "CP_UTF8"; *obj = PyBytes_FromFormat("cp%u", code_page); if (*obj == NULL) @@ -11629,16 +11632,10 @@ PyUnicode_Concat(PyObject *left, PyObject *right) return NULL; if (!PyUnicode_Check(right)) { - if (_PyTemplate_CheckExact(right)) { - // str + tstring is implemented in the tstring type - return _PyTemplate_Concat(left, right); - } - else { - PyErr_Format(PyExc_TypeError, - "can only concatenate str (not \"%.200s\") to str", - Py_TYPE(right)->tp_name); - return NULL; - } + PyErr_Format(PyExc_TypeError, + "can only concatenate str (not \"%.200s\") to str", + Py_TYPE(right)->tp_name); + return NULL; } /* Shortcuts */ @@ -13944,7 +13941,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { - if (Py_TYPE(obj) == &PyLong_Type) { + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); } @@ -14093,6 +14095,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 66435924b6c6c3..a47d6193d70889 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -4,6 +4,7 @@ #include "pycore_typevarobject.h" // _PyTypeAlias_Type, _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString #include "pycore_unionobject.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() typedef struct { @@ -21,9 +22,7 @@ unionobject_dealloc(PyObject *self) unionobject *alias = (unionobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->args); Py_XDECREF(alias->hashable_args); @@ -290,7 +289,7 @@ union_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { - if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) { + if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) { goto error; } PyObject *p = PyTuple_GET_ITEM(alias->args, i); @@ -300,12 +299,12 @@ union_repr(PyObject *self) } #if 0 - PyUnicodeWriter_WriteUTF8(writer, "|args=", 6); + PyUnicodeWriter_WriteASCII(writer, "|args=", 6); PyUnicodeWriter_WriteRepr(writer, alias->args); - PyUnicodeWriter_WriteUTF8(writer, "|h=", 3); + PyUnicodeWriter_WriteASCII(writer, "|h=", 3); PyUnicodeWriter_WriteRepr(writer, alias->hashable_args); if (alias->unhashable_args) { - PyUnicodeWriter_WriteUTF8(writer, "|u=", 3); + PyUnicodeWriter_WriteASCII(writer, "|u=", 3); PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args); } #endif @@ -394,8 +393,23 @@ static PyGetSetDef union_properties[] = { {0} }; +static PyObject * +union_nb_or(PyObject *a, PyObject *b) +{ + unionbuilder ub; + if (!unionbuilder_init(&ub, true)) { + return NULL; + } + if (!unionbuilder_add_single(&ub, a) || + !unionbuilder_add_single(&ub, b)) { + unionbuilder_finalize(&ub); + return NULL; + } + return make_union(&ub); +} + static PyNumberMethods union_as_number = { - .nb_or = _Py_union_type_or, // Add __or__ function + .nb_or = union_nb_or, // Add __or__ function }; static const char* const cls_attrs[] = { @@ -475,11 +489,13 @@ _Py_union_from_tuple(PyObject *args) } if (PyTuple_CheckExact(args)) { if (!unionbuilder_add_tuple(&ub, args)) { + unionbuilder_finalize(&ub); return NULL; } } else { if (!unionbuilder_add_single(&ub, args)) { + unionbuilder_finalize(&ub); return NULL; } } diff --git a/PC/_wmimodule.cpp b/PC/_wmimodule.cpp index b6efb3e4a207b4..30d61c86587fbe 100644 --- a/PC/_wmimodule.cpp +++ b/PC/_wmimodule.cpp @@ -57,11 +57,11 @@ _query_thread(LPVOID param) IEnumWbemClassObject* enumerator = NULL; HRESULT hr = S_OK; BSTR bstrQuery = NULL; - struct _query_data *data = (struct _query_data*)param; + _query_data data = *(struct _query_data*)param; // gh-125315: Copy the query string first, so that if the main thread gives // up on waiting we aren't left with a dangling pointer (and a likely crash) - bstrQuery = SysAllocString(data->query); + bstrQuery = SysAllocString(data.query); if (!bstrQuery) { hr = HRESULT_FROM_WIN32(ERROR_NOT_ENOUGH_MEMORY); } @@ -71,7 +71,7 @@ _query_thread(LPVOID param) } if (FAILED(hr)) { - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); if (bstrQuery) { SysFreeString(bstrQuery); } @@ -96,7 +96,7 @@ _query_thread(LPVOID param) IID_IWbemLocator, (LPVOID *)&locator ); } - if (SUCCEEDED(hr) && !SetEvent(data->initEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.initEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -105,7 +105,7 @@ _query_thread(LPVOID param) NULL, NULL, 0, NULL, 0, 0, &services ); } - if (SUCCEEDED(hr) && !SetEvent(data->connectEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.connectEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -143,7 +143,7 @@ _query_thread(LPVOID param) if (FAILED(hr) || got != 1 || !value) { continue; } - if (!startOfEnum && !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL)) { + if (!startOfEnum && !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL)) { hr = HRESULT_FROM_WIN32(GetLastError()); break; } @@ -171,10 +171,10 @@ _query_thread(LPVOID param) DWORD cbStr1, cbStr2; cbStr1 = (DWORD)(wcslen(propName) * sizeof(propName[0])); cbStr2 = (DWORD)(wcslen(propStr) * sizeof(propStr[0])); - if (!WriteFile(data->writePipe, propName, cbStr1, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"=", 2, &written, NULL) || - !WriteFile(data->writePipe, propStr, cbStr2, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL) + if (!WriteFile(data.writePipe, propName, cbStr1, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"=", 2, &written, NULL) || + !WriteFile(data.writePipe, propStr, cbStr2, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL) ) { hr = HRESULT_FROM_WIN32(GetLastError()); } @@ -200,7 +200,7 @@ _query_thread(LPVOID param) locator->Release(); } CoUninitialize(); - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); return (DWORD)hr; } diff --git a/PC/icons/idlex150.png b/PC/icons/idlex150.png index 806cb0c8aa219b..dbdca5b00f8812 100644 Binary files a/PC/icons/idlex150.png and b/PC/icons/idlex150.png differ diff --git a/PC/layout/main.py b/PC/layout/main.py index 7324a135133b66..8543e7c56e1c41 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -247,9 +247,15 @@ def in_build(f, dest="", new_name=None, no_lib=False): if ns.include_freethreaded: yield from in_build("venvlaunchert.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlaunchert.exe", "Lib/venv/scripts/nt/") - else: + elif (VER_MAJOR, VER_MINOR) > (3, 12): yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/") + else: + # Older versions of venv expected the scripts to be named 'python' + # and they were renamed at this stage. We need to replicate that + # when packaging older versions. + yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/", "python") + yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/", "pythonw") if ns.include_tools: @@ -652,15 +658,6 @@ def main(): ns.doc_build = (Path.cwd() / ns.doc_build).resolve() if ns.include_cat and not ns.include_cat.is_absolute(): ns.include_cat = (Path.cwd() / ns.include_cat).resolve() - if not ns.arch: - # TODO: Calculate arch from files in ns.build instead - if sys.winver.endswith("-arm64"): - ns.arch = "arm64" - elif sys.winver.endswith("-32"): - ns.arch = "win32" - else: - ns.arch = "amd64" - if ns.zip and not ns.zip.is_absolute(): ns.zip = (Path.cwd() / ns.zip).resolve() if ns.catalog and not ns.catalog.is_absolute(): @@ -668,6 +665,17 @@ def main(): configure_logger(ns) + if not ns.arch: + from .support.arch import calculate_from_build_dir + ns.arch = calculate_from_build_dir(ns.build) + + expect = f"{VER_MAJOR}.{VER_MINOR}.{VER_MICRO}{VER_SUFFIX}" + actual = check_patchlevel_version(ns.source) + if actual and actual != expect: + log_error(f"Inferred version {expect} does not match {actual} from patchlevel.h. " + "You should set %PYTHONINCLUDE% or %PYTHON_HEXVERSION% before launching.") + return 5 + log_info( """OPTIONS Source: {ns.source} diff --git a/PC/layout/support/arch.py b/PC/layout/support/arch.py new file mode 100644 index 00000000000000..daf4efbc7ab674 --- /dev/null +++ b/PC/layout/support/arch.py @@ -0,0 +1,34 @@ +from struct import unpack +from .constants import * +from .logging import * + +def calculate_from_build_dir(root): + candidates = [ + root / PYTHON_DLL_NAME, + root / FREETHREADED_PYTHON_DLL_NAME, + *root.glob("*.dll"), + *root.glob("*.pyd"), + # Check EXE last because it's easier to have cross-platform EXE + *root.glob("*.exe"), + ] + + ARCHS = { + b"PE\0\0\x4c\x01": "win32", + b"PE\0\0\x64\x86": "amd64", + b"PE\0\0\x64\xAA": "arm64" + } + + first_exc = None + for pe in candidates: + try: + # Read the PE header to grab the machine type + with open(pe, "rb") as f: + f.seek(0x3C) + offset = int.from_bytes(f.read(4), "little") + f.seek(offset) + arch = ARCHS[f.read(6)] + except (FileNotFoundError, PermissionError, LookupError) as ex: + log_debug("Failed to open {}: {}", pe, ex) + continue + log_info("Inferred architecture {} from {}", arch, pe) + return arch diff --git a/PC/layout/support/constants.py b/PC/layout/support/constants.py index ae22aa16ebfa5d..6b8c915e519743 100644 --- a/PC/layout/support/constants.py +++ b/PC/layout/support/constants.py @@ -6,6 +6,8 @@ __version__ = "3.8" import os +import pathlib +import re import struct import sys @@ -13,9 +15,15 @@ def _unpack_hexversion(): try: hexversion = int(os.getenv("PYTHON_HEXVERSION"), 16) + return struct.pack(">i", hexversion) except (TypeError, ValueError): - hexversion = sys.hexversion - return struct.pack(">i", hexversion) + pass + if os.getenv("PYTHONINCLUDE"): + try: + return _read_patchlevel_version(pathlib.Path(os.getenv("PYTHONINCLUDE"))) + except OSError: + pass + return struct.pack(">i", sys.hexversion) def _get_suffix(field4): @@ -26,6 +34,39 @@ def _get_suffix(field4): return "" +def _read_patchlevel_version(sources): + if not sources.match("Include"): + sources /= "Include" + values = {} + with open(sources / "patchlevel.h", "r", encoding="utf-8") as f: + for line in f: + m = re.match(r'#\s*define\s+(PY_\S+?)\s+(\S+)', line.strip(), re.I) + if m and m.group(2): + v = m.group(2) + if v.startswith('"'): + v = v[1:-1] + else: + v = values.get(v, v) + if isinstance(v, str): + try: + v = int(v, 16 if v.startswith("0x") else 10) + except ValueError: + pass + values[m.group(1)] = v + return ( + values["PY_MAJOR_VERSION"], + values["PY_MINOR_VERSION"], + values["PY_MICRO_VERSION"], + values["PY_RELEASE_LEVEL"] << 4 | values["PY_RELEASE_SERIAL"], + ) + + +def check_patchlevel_version(sources): + got = _read_patchlevel_version(sources) + if got != (VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4): + return f"{got[0]}.{got[1]}.{got[2]}{_get_suffix(got[3])}" + + VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4 = _unpack_hexversion() VER_SUFFIX = _get_suffix(VER_FIELD4) VER_FIELD3 = VER_MICRO << 8 | VER_FIELD4 diff --git a/PC/layout/support/pymanager.py b/PC/layout/support/pymanager.py index 667c89cdd2cc7a..831d49ea3f9b46 100644 --- a/PC/layout/support/pymanager.py +++ b/PC/layout/support/pymanager.py @@ -80,7 +80,9 @@ def calculate_install_json(ns, *, for_embed=False, for_test=False): # Tag used in runtime ID (for side-by-side install/updates) ID_TAG = XY_ARCH_TAG - # Tag shown in 'py list' output + # Tag shown in 'py list' output. + # gh-139810: We only include '-dev' here for prereleases, even though it + # works for final releases too. DISPLAY_TAG = f"{XY_TAG}-dev{TAG_ARCH}" if VER_SUFFIX else XY_ARCH_TAG # Tag used for PEP 514 registration SYS_WINVER = XY_TAG + (TAG_ARCH if TAG_ARCH != '-64' else '') @@ -102,9 +104,10 @@ def calculate_install_json(ns, *, for_embed=False, for_test=False): FULL_ARCH_TAG, XY_ARCH_TAG, X_ARCH_TAG, - # X_TAG and XY_TAG doesn't include VER_SUFFIX, so create -dev versions - f"{XY_TAG}-dev{TAG_ARCH}" if XY_TAG and VER_SUFFIX else "", - f"{X_TAG}-dev{TAG_ARCH}" if X_TAG and VER_SUFFIX else "", + # gh-139810: The -dev tags are always included so that the latest + # release is chosen, no matter whether it's prerelease or final. + f"{XY_TAG}-dev{TAG_ARCH}" if XY_TAG else "", + f"{X_TAG}-dev{TAG_ARCH}" if X_TAG else "", ] # Generate run-for entries for each target. @@ -115,16 +118,15 @@ def calculate_install_json(ns, *, for_embed=False, for_test=False): ]: if not base["target"]: continue - STD_RUN_FOR.append({**base, "tag": FULL_ARCH_TAG}) + STD_RUN_FOR.extend([ + {**base, "tag": FULL_ARCH_TAG}, + {**base, "tag": f"{XY_TAG}-dev{TAG_ARCH}" if XY_TAG else ""}, + {**base, "tag": f"{X_TAG}-dev{TAG_ARCH}" if X_TAG else ""}, + ]) if XY_TAG: STD_RUN_FOR.append({**base, "tag": XY_ARCH_TAG}) if X_TAG: STD_RUN_FOR.append({**base, "tag": X_ARCH_TAG}) - if VER_SUFFIX: - STD_RUN_FOR.extend([ - {**base, "tag": f"{XY_TAG}-dev{TAG_ARCH}" if XY_TAG else ""}, - {**base, "tag": f"{X_TAG}-dev{TAG_ARCH}" if X_TAG else ""}, - ]) # Generate alias entries for each target. We need both arch and non-arch # versions as well as windowed/non-windowed versions to make sure that all diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h similarity index 97% rename from PC/pyconfig.h.in rename to PC/pyconfig.h index 9e70303868e5de..710a737ebcc081 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h @@ -94,12 +94,17 @@ WIN32 is still required for the locale module. #endif #endif /* Py_BUILD_CORE || Py_BUILD_CORE_BUILTIN || Py_BUILD_CORE_MODULE */ -/* Define to 1 if you want to disable the GIL */ -/* Uncomment the definition for free-threaded builds, or define it manually - * when compiling extension modules. Note that we test with #ifdef, so - * defining as 0 will still disable the GIL. */ -#ifndef Py_GIL_DISABLED -/* #define Py_GIL_DISABLED 1 */ +/* Define to 1 when compiling for experimental free-threaded builds */ +#ifdef Py_GIL_DISABLED +/* We undefine if it was set to zero because all later checks are #ifdef. + * Note that non-Windows builds do not do this, and so every effort should + * be made to avoid defining the variable at all when not desired. However, + * sysconfig.get_config_var always returns a 1 or a 0, and so it seems likely + * that a build backend will define it with the value. + */ +#if Py_GIL_DISABLED == 0 +#undef Py_GIL_DISABLED +#endif #endif /* Compiler specific defines */ diff --git a/PC/winreg.c b/PC/winreg.c index c0de5c1353a349..159df46bc639d6 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -51,6 +51,7 @@ PyDoc_STRVAR(module_doc, "CreateKey() - Creates the specified key, or opens it if it already exists.\n" "DeleteKey() - Deletes the specified key.\n" "DeleteValue() - Removes a named value from the specified registry key.\n" +"DeleteTree() - Deletes the specified key and all its subkeys and values recursively.\n" "EnumKey() - Enumerates subkeys of the specified open registry key.\n" "EnumValue() - Enumerates values of the specified open registry key.\n" "ExpandEnvironmentStrings() - Expand the env strings in a REG_EXPAND_SZ\n" @@ -102,7 +103,8 @@ PyDoc_STRVAR(PyHKEY_doc, "Operations:\n" "__bool__ - Handles with an open object return true, otherwise false.\n" "__int__ - Converting a handle to an integer returns the Win32 handle.\n" -"rich comparison - Handle objects are compared using the handle value."); +"__enter__, __exit__ - Context manager support for 'with' statement,\n" +"automatically closes handle."); @@ -426,7 +428,9 @@ PyHKEY_Close(winreg_state *st, PyObject *ob_handle) if (PyHKEY_Check(st, ob_handle)) { ((PyHKEYObject*)ob_handle)->hkey = 0; } + Py_BEGIN_ALLOW_THREADS rc = key ? RegCloseKey(key) : ERROR_SUCCESS; + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); return rc == ERROR_SUCCESS; @@ -499,14 +503,21 @@ PyWinObject_CloseHKEY(winreg_state *st, PyObject *obHandle) } #if SIZEOF_LONG >= SIZEOF_HKEY else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + long rc; + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); } #else else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsVoidPtr(obHandle)); + long rc; + HKEY hkey = (HKEY)PyLong_AsVoidPtr(obHandle); + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey(hkey); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); @@ -924,7 +935,9 @@ winreg_CreateKey_impl(PyObject *module, HKEY key, const wchar_t *sub_key) (Py_ssize_t)KEY_WRITE) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyW(key, sub_key, &retKey); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKey"); return NULL; @@ -973,8 +986,10 @@ winreg_CreateKeyEx_impl(PyObject *module, HKEY key, const wchar_t *sub_key, (Py_ssize_t)access) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyExW(key, sub_key, reserved, NULL, 0, access, NULL, &retKey, NULL); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKeyEx"); return NULL; @@ -1187,10 +1202,12 @@ winreg_EnumValue_impl(PyObject *module, HKEY key, int index) (Py_ssize_t)key, index) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, - &retValueSize, &retDataSize, NULL, NULL)) - != ERROR_SUCCESS) + + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &retValueSize, &retDataSize, NULL, NULL); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); ++retValueSize; /* include null terminators */ @@ -1477,9 +1494,11 @@ winreg_QueryInfoKey_impl(PyObject *module, HKEY key) if (PySys_Audit("winreg.QueryInfoKey", "n", (Py_ssize_t)key) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, - &nValues, NULL, NULL, NULL, &ft)) - != ERROR_SUCCESS) { + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, + &nValues, NULL, NULL, NULL, &ft); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) { return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); } li.LowPart = ft.dwLowDateTime; @@ -1587,7 +1606,9 @@ winreg_QueryValue_impl(PyObject *module, HKEY key, const wchar_t *sub_key) PyMem_Free(pbuf); } if (childKey != key) { + Py_BEGIN_ALLOW_THREADS RegCloseKey(childKey); + Py_END_ALLOW_THREADS } return result; } @@ -1625,7 +1646,9 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) (Py_ssize_t)key, NULL, name) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, NULL, NULL, &bufSize); + Py_END_ALLOW_THREADS if (rc == ERROR_MORE_DATA) bufSize = 256; else if (rc != ERROR_SUCCESS) @@ -1637,8 +1660,10 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) while (1) { retSize = bufSize; + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, &typ, (BYTE *)retBuf, &retSize); + Py_END_ALLOW_THREADS if (rc != ERROR_MORE_DATA) break; @@ -1656,7 +1681,7 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryValueEx"); } - obData = Reg2Py(retBuf, bufSize, typ); + obData = Reg2Py(retBuf, retSize, typ); PyMem_Free(retBuf); if (obData == NULL) return NULL; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 8c161835af9a8c..5ceddf759b8f3b 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -106,6 +106,7 @@ </ItemGroup> <ItemGroup> <ClCompile Include="..\Modules\atexitmodule.c" /> + <ClCompile Include="..\Modules\_datetimemodule.c" /> <ClCompile Include="..\Modules\faulthandler.c" /> <ClCompile Include="..\Modules\gcmodule.c" /> <ClCompile Include="..\Modules\getbuildinfo.c" /> @@ -276,7 +277,7 @@ <ClCompile Include="..\Python\uniqueid.c" /> </ItemGroup> <ItemGroup> - <ClInclude Include="..\PC\pyconfig.h.in" /> + <ClInclude Include="..\PC\pyconfig.h" /> </ItemGroup> <ItemGroup> <!-- BEGIN frozen modules --> @@ -436,31 +437,6 @@ <ImportGroup Label="ExtensionTargets"> </ImportGroup> - <!-- Direct copy from pythoncore.vcxproj, but this one is only used for our - own build. All other extension modules will use the copy that pythoncore - generates. --> - <Target Name="_UpdatePyconfig" BeforeTargets="PrepareForBuild"> - <MakeDir Directories="$(IntDir)" Condition="!Exists($(IntDir))" /> - <ItemGroup> - <PyConfigH Remove="@(PyConfigH)" /> - <PyConfigH Include="@(ClInclude)" Condition="'%(Filename)%(Extension)' == 'pyconfig.h.in'" /> - </ItemGroup> - <Error Text="Did not find pyconfig.h" Condition="@(ClInclude) == ''" /> - <PropertyGroup> - <PyConfigH>@(PyConfigH->'%(FullPath)', ';')</PyConfigH> - <PyConfigHText>$([System.IO.File]::ReadAllText($(PyConfigH)))</PyConfigHText> - <OldPyConfigH Condition="Exists('$(IntDir)pyconfig.h')">$([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h'))</OldPyConfigH> - </PropertyGroup> - <PropertyGroup Condition="$(DisableGil) == 'true'"> - <PyConfigHText>$(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1'))</PyConfigHText> - </PropertyGroup> - <Message Text="Updating pyconfig.h" Condition="$(PyConfigHText.TrimEnd()) != $(OldPyConfigH.TrimEnd())" /> - <WriteLinesToFile File="$(IntDir)pyconfig.h" - Lines="$(PyConfigHText)" - Overwrite="true" - Condition="$(PyConfigHText.TrimEnd()) != $(OldPyConfigH.TrimEnd())" /> - </Target> - <Target Name="_RebuildGetPath" AfterTargets="_RebuildFrozen" Condition="$(Configuration) != 'PGUpdate'"> <Exec Command='"$(TargetPath)" "%(GetPath.ModName)" "%(GetPath.FullPath)" "%(GetPath.IntFile)"' /> diff --git a/PCbuild/_testclinic_limited.vcxproj b/PCbuild/_testclinic_limited.vcxproj index 183a55080e8693..95c205309b1f30 100644 --- a/PCbuild/_testclinic_limited.vcxproj +++ b/PCbuild/_testclinic_limited.vcxproj @@ -70,6 +70,7 @@ <ProjectGuid>{01FDF29A-40A1-46DF-84F5-85EBBD2A2410}</ProjectGuid> <RootNamespace>_testclinic_limited</RootNamespace> <Keyword>Win32Proj</Keyword> + <SupportPGO>false</SupportPGO> </PropertyGroup> <Import Project="python.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> diff --git a/PCbuild/_zstd.vcxproj b/PCbuild/_zstd.vcxproj index b53f93a6af8d11..6f91b8d05cca20 100644 --- a/PCbuild/_zstd.vcxproj +++ b/PCbuild/_zstd.vcxproj @@ -137,6 +137,7 @@ <ItemGroup> <ClInclude Include="..\Modules\_zstd\_zstdmodule.h" /> <ClInclude Include="..\Modules\_zstd\buffer.h" /> + <ClInclude Include="..\Modules\_zstd\zstddict.h" /> <ClInclude Include="$(zstdDir)lib\common\bitstream.h" /> <ClInclude Include="$(zstdDir)lib\common\error_private.h" /> <ClInclude Include="$(zstdDir)lib\common\fse.h" /> diff --git a/PCbuild/_zstd.vcxproj.filters b/PCbuild/_zstd.vcxproj.filters index d4d36063c85d09..eec666e5eaf439 100644 --- a/PCbuild/_zstd.vcxproj.filters +++ b/PCbuild/_zstd.vcxproj.filters @@ -128,6 +128,9 @@ <ClInclude Include="..\Modules\_zstd\buffer.h"> <Filter>Header Files</Filter> </ClInclude> + <ClInclude Include="..\Modules\_zstd\zstddict.h"> + <Filter>Header Files</Filter> + </ClInclude> <ClInclude Include="$(zstdDir)lib\zstd.h"> <Filter>Header Files\zstd</Filter> </ClInclude> diff --git a/PCbuild/build.bat b/PCbuild/build.bat index 2f358991e484ce..602357048867d6 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -33,7 +33,7 @@ echo. -k Attempt to kill any running Pythons before building (usually done echo. automatically by the pythoncore project) echo. --pgo Build with Profile-Guided Optimization. This flag echo. overrides -c and -d -echo. --disable-gil Enable experimental support for running without the GIL. +echo. --disable-gil Enable support for running without the GIL. echo. --test-marker Enable the test marker within the build. echo. --regen Regenerate all opcodes, grammar and tokens. echo. --experimental-jit Enable the experimental just-in-time compiler. diff --git a/PCbuild/find_python.bat b/PCbuild/find_python.bat index d65d080ca71a90..841d83968c60be 100644 --- a/PCbuild/find_python.bat +++ b/PCbuild/find_python.bat @@ -47,7 +47,7 @@ @rem If py.exe finds a recent enough version, use that one @rem It is fine to add new versions to this list when they have released, @rem but we do not use prerelease builds here. -@for %%p in (3.13 3.12 3.11 3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found +@for %%p in (3.14 3.13 3.12 3.11 3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found @if NOT exist "%_Py_EXTERNALS_DIR%" mkdir "%_Py_EXTERNALS_DIR%" @set _Py_NUGET=%NUGET% diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index 4ecc8925349c93..a78aa6a23041ad 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -5,8 +5,28 @@ import pathlib import sys import time +import urllib.error +import urllib.request import zipfile -from urllib.request import urlretrieve + + +def retrieve_with_retries(download_location, output_path, reporthook, + max_retries=7): + """Download a file with exponential backoff retry and save to disk.""" + for attempt in range(max_retries + 1): + try: + resp = urllib.request.urlretrieve( + download_location, + output_path, + reporthook=reporthook, + ) + except (urllib.error.URLError, ConnectionError) as ex: + if attempt == max_retries: + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(2.25**attempt) + else: + return resp def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): @@ -16,10 +36,10 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): if verbose: reporthook = print zip_dir.mkdir(parents=True, exist_ok=True) - filename, headers = urlretrieve( + filename, _headers = retrieve_with_retries( url, zip_dir / f'{commit_hash}.zip', - reporthook=reporthook, + reporthook ) return filename diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index e29054f5734d49..50a227b563a7c0 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -54,9 +54,9 @@ echo.Fetching external libraries... set libraries= set libraries=%libraries% bzip2-1.0.8 if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.4.4 -if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.16 +if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.0.18 set libraries=%libraries% mpdecimal-4.0.0 -set libraries=%libraries% sqlite-3.49.1.0 +set libraries=%libraries% sqlite-3.50.4.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-core-8.6.15.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-8.6.15.0 set libraries=%libraries% xz-5.2.5 @@ -79,7 +79,7 @@ echo.Fetching external binaries... set binaries= if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4 -if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.16.2 +if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0 diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 4e414dc913b9d5..53bfe5e3ea95cc 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -10,10 +10,10 @@ <Py_IntDir Condition="'$(Py_IntDir)' == ''">$(MSBuildThisFileDirectory)obj\</Py_IntDir> <IntDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\$(ProjectName)\</IntDir> <IntDir>$(IntDir.Replace(`\\`, `\`))</IntDir> - <!-- pyconfig.h is updated by pythoncore.vcxproj, so it's always in pythoncore's IntDir --> - <GeneratedPyConfigDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\pythoncore\</GeneratedPyConfigDir> <GeneratedFrozenModulesDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\</GeneratedFrozenModulesDir> <GeneratedZlibNgDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\</GeneratedZlibNgDir> + <GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)</GeneratedJitStencilsDir> + <GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument</GeneratedJitStencilsDir> <TargetName Condition="'$(TargetName)' == ''">$(ProjectName)</TargetName> <TargetName>$(TargetName)$(PyDebugExt)</TargetName> <GenerateManifest>false</GenerateManifest> @@ -49,11 +49,12 @@ <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64'">_WIN64;</_PlatformPreprocessorDefinition> <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64' and $(PlatformToolset) != 'ClangCL'">_M_X64;$(_PlatformPreprocessorDefinition)</_PlatformPreprocessorDefinition> <_Py3NamePreprocessorDefinition>PY3_DLLNAME=L"$(Py3DllName)$(PyDebugExt)";</_Py3NamePreprocessorDefinition> + <_FreeThreadedPreprocessorDefinition Condition="$(DisableGil) == 'true'">Py_GIL_DISABLED=1;</_FreeThreadedPreprocessorDefinition> </PropertyGroup> <ItemDefinitionGroup> <ClCompile> - <AdditionalIncludeDirectories>$(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(GeneratedPyConfigDir);$(PySourcePath)PC;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> - <PreprocessorDefinitions>WIN32;$(_Py3NamePreprocessorDefinition);$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)%(PreprocessorDefinitions)</PreprocessorDefinitions> + <AdditionalIncludeDirectories>$(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(PySourcePath)PC;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="'$(SupportPGO)' and ($(Configuration) == 'PGInstrument' or $(Configuration) == 'PGUpdate')">_Py_USING_PGO=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <Optimization>MaxSpeed</Optimization> @@ -96,19 +97,16 @@ <TargetMachine Condition="'$(Platform)' == 'x64'">MachineX64</TargetMachine> <TargetMachine Condition="'$(Platform)'=='ARM'">MachineARM</TargetMachine> <TargetMachine Condition="'$(Platform)'=='ARM64'">MachineARM64</TargetMachine> - <ProfileGuidedDatabase Condition="$(SupportPGO)">$(OutDir)$(TargetName).pgd</ProfileGuidedDatabase> - <LinkTimeCodeGeneration Condition="$(Configuration) == 'Release'">UseLinkTimeCodeGeneration</LinkTimeCodeGeneration> - <LinkTimeCodeGeneration Condition="$(SupportPGO) and $(Configuration) == 'PGInstrument'">PGInstrument</LinkTimeCodeGeneration> - <LinkTimeCodeGeneration Condition="$(SupportPGO) and $(Configuration) == 'PGUpdate'">PGUpdate</LinkTimeCodeGeneration> + <LinkTimeCodeGeneration Condition="$(Configuration) != 'Debug'">UseLinkTimeCodeGeneration</LinkTimeCodeGeneration> <AdditionalDependencies>advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalOptions Condition="$(Configuration) != 'Debug'">/OPT:REF,NOICF %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="$(MSVCHasBrokenARM64Clamping) == 'true' and $(Platform) == 'ARM64'">-d2:-pattern-opt-disable:-932189325 %(AdditionalOptions)</AdditionalOptions> + <AdditionalOptions Condition="$(SupportPGO) and $(Configuration) == 'PGInstrument' and $(PlatformToolset) != 'ClangCL'">/GENPROFILE %(AdditionalOptions)</AdditionalOptions> + <AdditionalOptions Condition="$(SupportPGO) and $(Configuration) == 'PGUpdate' and $(PlatformToolset) != 'ClangCL'">/USEPROFILE %(AdditionalOptions)</AdditionalOptions> </Link> <Lib> <LinkTimeCodeGeneration>false</LinkTimeCodeGeneration> - <LinkTimeCodeGeneration Condition="$(Configuration) == 'Release'">true</LinkTimeCodeGeneration> - <LinkTimeCodeGeneration Condition="$(SupportPGO) and $(Configuration) == 'PGInstrument'">true</LinkTimeCodeGeneration> - <LinkTimeCodeGeneration Condition="$(SupportPGO) and $(Configuration) == 'PGUpdate'">true</LinkTimeCodeGeneration> + <LinkTimeCodeGeneration Condition="$(Configuration) != 'Debug'">true</LinkTimeCodeGeneration> </Lib> <ResourceCompile> <AdditionalIncludeDirectories>$(PySourcePath)PC;$(PySourcePath)Include;$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> diff --git a/PCbuild/python.props b/PCbuild/python.props index ddc7696d2762fe..cc1572526559ce 100644 --- a/PCbuild/python.props +++ b/PCbuild/python.props @@ -11,6 +11,7 @@ We set BasePlatformToolset for ICC's benefit, it's otherwise ignored. --> + <BasePlatformToolset Condition="'$(BasePlatformToolset)' == '' and '$(VisualStudioVersion)' == '18.0'">v143</BasePlatformToolset> <BasePlatformToolset Condition="'$(BasePlatformToolset)' == '' and '$(VisualStudioVersion)' == '17.0'">v143</BasePlatformToolset> <BasePlatformToolset Condition="'$(BasePlatformToolset)' == '' and '$(VisualStudioVersion)' == '16.0'">v142</BasePlatformToolset> <BasePlatformToolset Condition="'$(BasePlatformToolset)' == '' and ('$(MSBuildToolsVersion)' == '15.0' or '$(VisualStudioVersion)' == '15.0')">v141</BasePlatformToolset> @@ -74,15 +75,15 @@ <Import Project="$(ExternalProps)" Condition="$(ExternalProps) != '' and Exists('$(ExternalProps)')" /> <PropertyGroup> - <sqlite3Dir Condition="$(sqlite3Dir) == ''">$(ExternalsDir)sqlite-3.49.1.0\</sqlite3Dir> + <sqlite3Dir Condition="$(sqlite3Dir) == ''">$(ExternalsDir)sqlite-3.50.4.0\</sqlite3Dir> <bz2Dir Condition="$(bz2Dir) == ''">$(ExternalsDir)bzip2-1.0.8\</bz2Dir> <lzmaDir Condition="$(lzmaDir) == ''">$(ExternalsDir)xz-5.2.5\</lzmaDir> <libffiDir Condition="$(libffiDir) == ''">$(ExternalsDir)libffi-3.4.4\</libffiDir> <libffiOutDir Condition="$(libffiOutDir) == ''">$(libffiDir)$(ArchName)\</libffiOutDir> <libffiIncludeDir Condition="$(libffiIncludeDir) == ''">$(libffiOutDir)include</libffiIncludeDir> <mpdecimalDir Condition="$(mpdecimalDir) == ''">$(ExternalsDir)\mpdecimal-4.0.0\</mpdecimalDir> - <opensslDir Condition="$(opensslDir) == ''">$(ExternalsDir)openssl-3.0.16\</opensslDir> - <opensslOutDir Condition="$(opensslOutDir) == ''">$(ExternalsDir)openssl-bin-3.0.16.2\$(ArchName)\</opensslOutDir> + <opensslDir Condition="$(opensslDir) == ''">$(ExternalsDir)openssl-3.0.18\</opensslDir> + <opensslOutDir Condition="$(opensslOutDir) == ''">$(ExternalsDir)openssl-bin-3.0.18\$(ArchName)\</opensslOutDir> <opensslIncludeDir Condition="$(opensslIncludeDir) == ''">$(opensslOutDir)include</opensslIncludeDir> <nasmDir Condition="$(nasmDir) == ''">$(ExternalsDir)\nasm-2.11.06\</nasmDir> <zlibDir Condition="$(zlibDir) == ''">$(ExternalsDir)\zlib-1.3.1\</zlibDir> diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 549d6284972afc..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -102,6 +102,7 @@ <AdditionalOptions>/Zm200 %(AdditionalOptions)</AdditionalOptions> <AdditionalIncludeDirectories>$(PySourcePath)Modules\_hacl;$(PySourcePath)Modules\_hacl\include;$(PySourcePath)Python;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories Condition="$(IncludeExternals)">$(zlibNgDir);$(GeneratedZlibNgDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <AdditionalIncludeDirectories Condition="'$(UseJIT)' == 'true'">$(GeneratedJitStencilsDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <PreprocessorDefinitions>_USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";ZLIB_COMPAT;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="$(IncludeExternals)">_Py_HAVE_ZLIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="'$(UseJIT)' == 'true'">_Py_JIT;%(PreprocessorDefinitions)</PreprocessorDefinitions> @@ -409,7 +410,7 @@ <ClInclude Include="..\Parser\string_parser.h" /> <ClInclude Include="..\Parser\pegen.h" /> <ClInclude Include="..\PC\errmap.h" /> - <ClInclude Include="..\PC\pyconfig.h.in" /> + <ClInclude Include="..\PC\pyconfig.h" /> <ClInclude Include="..\Python\condvar.h" /> <ClInclude Include="..\Python\stdlib_module_names.h" /> <ClInclude Include="..\Python\thread_nt.h" /> @@ -418,8 +419,12 @@ <ClCompile Include="..\Modules\_abc.c" /> <ClCompile Include="..\Modules\_bisectmodule.c" /> <ClCompile Include="..\Modules\blake2module.c"> - <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions)</PreprocessorDefinitions> - <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'">HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'"> + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + </PreprocessorDefinitions> + <PreprocessorDefinitions Condition="'$(Platform)' == 'x64'"> + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + </PreprocessorDefinitions> </ClCompile> <ClCompile Include="..\Modules\_codecsmodule.c" /> <ClCompile Include="..\Modules\_collectionsmodule.c" /> @@ -688,34 +693,6 @@ </ImportGroup> <Target Name="_TriggerRegen" BeforeTargets="PrepareForBuild" DependsOnTargets="Regen" /> - <Target Name="_UpdatePyconfig" BeforeTargets="PrepareForBuild"> - <MakeDir Directories="$(IntDir)" Condition="!Exists($(IntDir))" /> - <ItemGroup> - <PyConfigH Remove="@(PyConfigH)" /> - <PyConfigH Include="@(ClInclude)" Condition="'%(Filename)%(Extension)' == 'pyconfig.h.in'" /> - </ItemGroup> - <Error Text="Did not find pyconfig.h" Condition="@(ClInclude) == ''" /> - <PropertyGroup> - <PyConfigH>@(PyConfigH->'%(FullPath)', ';')</PyConfigH> - <PyConfigHText>$([System.IO.File]::ReadAllText($(PyConfigH)))</PyConfigHText> - <OldPyConfigH Condition="Exists('$(IntDir)pyconfig.h')">$([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h'))</OldPyConfigH> - </PropertyGroup> - <PropertyGroup Condition="$(DisableGil) == 'true'"> - <PyConfigHText>$(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1'))</PyConfigHText> - </PropertyGroup> - <Message Text="Updating pyconfig.h" Condition="$(PyConfigHText.TrimEnd()) != $(OldPyConfigH.TrimEnd())" /> - <WriteLinesToFile File="$(IntDir)pyconfig.h" - Lines="$(PyConfigHText)" - Overwrite="true" - Condition="$(PyConfigHText.TrimEnd()) != $(OldPyConfigH.TrimEnd())" /> - </Target> - <Target Name="_CopyPyconfig" Inputs="$(IntDir)pyconfig.h" Outputs="$(OutDir)pyconfig.h" AfterTargets="Build" DependsOnTargets="_UpdatePyconfig"> - <Copy SourceFiles="$(IntDir)pyconfig.h" DestinationFolder="$(OutDir)" /> - </Target> - <Target Name="_CleanPyconfig" AfterTargets="Clean"> - <Delete Files="$(IntDir)pyconfig.h;$(OutDir)pyconfig.h" /> - </Target> - <Target Name="_GetBuildInfo" BeforeTargets="PrepareForBuild"> <PropertyGroup> <GIT Condition="$(GIT) == ''">git</GIT> diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 01e19aabdecdc7..abb345019bf6b9 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -230,7 +230,7 @@ _ssl again when building. _sqlite3 - Wraps SQLite 3.49.1, which is itself built by sqlite3.vcxproj + Wraps SQLite 3.50.4, which is itself built by sqlite3.vcxproj Homepage: https://www.sqlite.org/ _tkinter diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index 3ad17737807235..742597f5cb5ebd 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -29,12 +29,12 @@ <_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" /> <_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" /> <!-- Taken from _Target._compute_digest in Tools\jit\_targets.py: --> - <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/> + <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(PySourcePath)PC\pyconfig.h;$(PySourcePath)Tools\jit\**"/> <!-- Need to explicitly enumerate these, since globbing doesn't work for missing outputs: --> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils.h"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> <_CasesSources Include="$(PySourcePath)Python\bytecodes.c;$(PySourcePath)Python\optimizer_bytecodes.c;"/> <_CasesOutputs Include="$(PySourcePath)Python\generated_cases.c.h;$(PySourcePath)Include\opcode_ids.h;$(PySourcePath)Include\internal\pycore_uop_ids.h;$(PySourcePath)Python\opcode_targets.h;$(PySourcePath)Include\internal\pycore_opcode_metadata.h;$(PySourcePath)Include\internal\pycore_uop_metadata.h;$(PySourcePath)Python\optimizer_cases.c.h;$(PySourcePath)Lib\_opcode_metadata.py"/> <_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" /> @@ -116,7 +116,7 @@ <Target Name="_RegenJIT" Condition="'$(UseJIT)' == 'true'" - DependsOnTargets="_UpdatePyconfig;FindPythonForBuild" + DependsOnTargets="FindPythonForBuild" Inputs="@(_JITSources)" Outputs="@(_JITOutputs)"> <PropertyGroup> @@ -125,8 +125,7 @@ <JITArgs Condition="$(Platform) == 'x64'">x86_64-pc-windows-msvc</JITArgs> <JITArgs Condition="$(Configuration) == 'Debug'">$(JITArgs) --debug</JITArgs> </PropertyGroup> - <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs)' - WorkingDirectory="$(GeneratedPyConfigDir)"/> + <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs) --output-dir "$(GeneratedJitStencilsDir)" --pyconfig-dir "$(PySourcePath)PC"'/> </Target> <Target Name="_CleanJIT" AfterTargets="Clean"> <Delete Files="@(_JITOutputs)"/> diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 3bcc0870882a29..57e46b4399c66d 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -965,7 +965,7 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) { return RAISE_SYNTAX_ERROR_KNOWN_RANGE( conv_token, conv, - "%c-string: conversion type must come right after the exclamanation mark", + "%c-string: conversion type must come right after the exclamation mark", TOK_GET_STRING_PREFIX(p->tok) ); } @@ -1404,7 +1404,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { return NULL; } - PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); + + // Check if we're inside a raw f-string for format spec decoding + int is_raw = 0; + if (INSIDE_FSTRING(p->tok)) { + tokenizer_mode *mode = TOK_GET_MODE(p->tok); + is_raw = mode->raw; + } + + PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok); if (str == NULL) { return NULL; } @@ -1834,8 +1842,8 @@ _build_concatenated_joined_str(Parser *p, asdl_expr_seq *strings, return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena); } -static expr_ty -_build_concatenated_template_str(Parser *p, asdl_expr_seq *strings, +expr_ty +_PyPegen_concatenate_tstrings(Parser *p, asdl_expr_seq *strings, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena) { @@ -1853,7 +1861,6 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, Py_ssize_t len = asdl_seq_LEN(strings); assert(len > 0); - int t_string_found = 0; int f_string_found = 0; int unicode_string_found = 0; int bytes_found = 0; @@ -1873,7 +1880,8 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, f_string_found = 1; break; case TemplateStr_kind: - t_string_found = 1; + // python.gram handles this; we should never get here + assert(0); break; default: f_string_found = 1; @@ -1882,13 +1890,13 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } // Cannot mix unicode and bytes - if ((unicode_string_found || f_string_found || t_string_found) && bytes_found) { + if ((unicode_string_found || f_string_found) && bytes_found) { RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals"); return NULL; } // If it's only bytes or only unicode string, do a simple concat - if (!f_string_found && !t_string_found) { + if (!f_string_found) { if (len == 1) { return asdl_seq_GET(strings, 0); } @@ -1902,11 +1910,6 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } } - if (t_string_found) { - return _build_concatenated_template_str(p, strings, lineno, - col_offset, end_lineno, end_col_offset, arena); - } - return _build_concatenated_joined_str(p, strings, lineno, col_offset, end_lineno, end_col_offset, arena); } @@ -1936,6 +1939,9 @@ _PyPegen_register_stmts(Parser *p, asdl_stmt_seq* stmts) { return stmts; } stmt_ty last_stmt = asdl_seq_GET(stmts, len - 1); + if (p->last_stmt_location.lineno > last_stmt->lineno) { + return stmts; + } p->last_stmt_location.lineno = last_stmt->lineno; p->last_stmt_location.col_offset = last_stmt->col_offset; p->last_stmt_location.end_lineno = last_stmt->end_lineno; diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 09e014534fbabc..3e252cbc4883d1 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1009,7 +1009,7 @@ def visitModule(self, mod): else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -1044,7 +1044,7 @@ def visitModule(self, mod): // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { @@ -1244,6 +1244,32 @@ def visitModule(self, mod): Py_DECREF(unused); } } + + // Discard fields from 'expecting' that default to None + PyObject *field_types = NULL; + if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), + &_Py_ID(_field_types), + &field_types) < 0) + { + Py_DECREF(expecting); + return -1; + } + if (field_types != NULL) { + Py_ssize_t pos = 0; + PyObject *field_name, *field_type; + while (PyDict_Next(field_types, &pos, &field_name, &field_type)) { + if (_PyUnion_Check(field_type)) { + // optional field + if (PySet_Discard(expecting, field_name) < 0) { + Py_DECREF(expecting); + Py_DECREF(field_types); + return -1; + } + } + } + Py_DECREF(field_types); + } + // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); @@ -1486,7 +1512,7 @@ def visitModule(self, mod): for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -1510,7 +1536,7 @@ def visitModule(self, mod): } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -1614,7 +1640,7 @@ def visitModule(self, mod): } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..7f25afec302c22 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { } PyObject *res = NULL; - // Check if there is a # character in the expression + // Look for a # character outside of string literals int hash_detected = 0; + int in_string = 0; + char quote_char = 0; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { + char ch = tok_mode->last_expr_buffer[i]; + + // Skip escaped characters + if (ch == '\\') { + i++; + continue; + } + + // Handle quotes + if (ch == '"' || ch == '\'') { + // The following if/else block works becase there is an off number + // of quotes in STRING tokens and the lexer only ever reaches this + // function with valid STRING tokens. + // For example: """hello""" + // First quote: in_string = 1 + // Second quote: in_string = 0 + // Third quote: in_string = 1 + if (!in_string) { + in_string = 1; + quote_char = ch; + } + else if (ch == quote_char) { + in_string = 0; + } + continue; + } + + // Check for # outside strings + if (ch == '#' && !in_string) { hash_detected = 1; break; } } - + // If we found a # character in the expression, we need to handle comments if (hash_detected) { - Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); + // Allocate buffer for processed result + char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char)); if (!result) { return -1; } - Py_ssize_t i = 0; - Py_ssize_t j = 0; + Py_ssize_t i = 0; // Input position + Py_ssize_t j = 0; // Output position + in_string = 0; // Whether we're in a string + quote_char = 0; // Current string quote char - for (i = 0, j = 0; i < input_length; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - // Skip characters until newline or end of string - while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') { - if (tok_mode->last_expr_buffer[i] == '\n') { - result[j++] = tok_mode->last_expr_buffer[i]; - break; - } + // Process each character + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + char ch = tok_mode->last_expr_buffer[i]; + + // Handle string quotes + if (ch == '"' || ch == '\'') { + // See comment above to understand this part + if (!in_string) { + in_string = 1; + quote_char = ch; + } else if (ch == quote_char) { + in_string = 0; + } + result[j++] = ch; + } + // Skip comments + else if (ch == '#' && !in_string) { + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && + tok_mode->last_expr_buffer[i] != '\n') { i++; } - } else { - result[j++] = tok_mode->last_expr_buffer[i]; + if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + result[j++] = '\n'; + } } + // Copy other chars + else { + result[j++] = ch; + } + i++; } result[j] = '\0'; // Null-terminate the result string @@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { tok_mode->last_expr_size - tok_mode->last_expr_end, NULL ); - } - - if (!res) { + if (!res) { return -1; } token->metadata = res; @@ -491,6 +539,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(ERRORTOKEN); } } + else if (c == EOF && PyErr_Occurred()) { + return MAKE_TOKEN(ERRORTOKEN); + } else { break; } @@ -1328,7 +1379,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c)); } - if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) { + if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) { current_tok->in_debug = 1; } @@ -1421,7 +1472,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 5e8cac7249b21c..877127125a7652 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -9,6 +9,8 @@ #define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0) #define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0) +#define INSIDE_FSTRING_EXPR_AT_TOP(tok) \ + (tok->curly_bracket_depth - tok->curly_bracket_expr_start_depth == 1) enum decoding_state { STATE_INIT, diff --git a/Parser/parser.c b/Parser/parser.c index 509fac7df6e371..1c507937077deb 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14,7 +14,7 @@ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif static const int n_keyword_lists = 9; static KeywordToken *reserved_keywords[] = { @@ -348,185 +348,187 @@ static char *soft_keywords[] = { #define invalid_fstring_conversion_character_type 1261 #define invalid_tstring_replacement_field_type 1262 #define invalid_tstring_conversion_character_type 1263 -#define invalid_arithmetic_type 1264 -#define invalid_factor_type 1265 -#define invalid_type_params_type 1266 -#define _loop0_1_type 1267 -#define _loop1_2_type 1268 -#define _loop0_3_type 1269 -#define _gather_4_type 1270 -#define _tmp_5_type 1271 -#define _tmp_6_type 1272 -#define _tmp_7_type 1273 -#define _tmp_8_type 1274 -#define _tmp_9_type 1275 -#define _tmp_10_type 1276 -#define _tmp_11_type 1277 -#define _loop1_12_type 1278 -#define _tmp_13_type 1279 -#define _loop0_14_type 1280 -#define _gather_15_type 1281 -#define _tmp_16_type 1282 -#define _tmp_17_type 1283 -#define _loop0_18_type 1284 -#define _loop1_19_type 1285 -#define _loop0_20_type 1286 -#define _gather_21_type 1287 -#define _tmp_22_type 1288 -#define _loop0_23_type 1289 -#define _gather_24_type 1290 -#define _loop1_25_type 1291 -#define _tmp_26_type 1292 -#define _tmp_27_type 1293 -#define _loop0_28_type 1294 -#define _loop0_29_type 1295 -#define _loop1_30_type 1296 -#define _loop1_31_type 1297 -#define _loop0_32_type 1298 -#define _loop1_33_type 1299 -#define _loop0_34_type 1300 -#define _gather_35_type 1301 -#define _tmp_36_type 1302 -#define _loop1_37_type 1303 -#define _loop1_38_type 1304 -#define _loop1_39_type 1305 -#define _loop0_40_type 1306 -#define _gather_41_type 1307 -#define _tmp_42_type 1308 -#define _tmp_43_type 1309 -#define _tmp_44_type 1310 -#define _loop0_45_type 1311 -#define _gather_46_type 1312 -#define _loop0_47_type 1313 -#define _gather_48_type 1314 -#define _tmp_49_type 1315 -#define _loop0_50_type 1316 -#define _gather_51_type 1317 -#define _loop0_52_type 1318 -#define _gather_53_type 1319 -#define _loop0_54_type 1320 -#define _gather_55_type 1321 -#define _loop1_56_type 1322 -#define _loop1_57_type 1323 -#define _loop0_58_type 1324 -#define _gather_59_type 1325 -#define _loop1_60_type 1326 -#define _loop1_61_type 1327 -#define _loop1_62_type 1328 -#define _tmp_63_type 1329 -#define _loop0_64_type 1330 -#define _gather_65_type 1331 -#define _tmp_66_type 1332 -#define _tmp_67_type 1333 -#define _tmp_68_type 1334 -#define _tmp_69_type 1335 -#define _tmp_70_type 1336 -#define _loop0_71_type 1337 -#define _loop0_72_type 1338 -#define _loop1_73_type 1339 -#define _loop1_74_type 1340 -#define _loop0_75_type 1341 -#define _loop1_76_type 1342 -#define _loop0_77_type 1343 -#define _loop0_78_type 1344 -#define _loop0_79_type 1345 -#define _loop0_80_type 1346 -#define _loop1_81_type 1347 -#define _tmp_82_type 1348 -#define _loop0_83_type 1349 -#define _gather_84_type 1350 -#define _loop1_85_type 1351 -#define _loop0_86_type 1352 -#define _tmp_87_type 1353 -#define _loop0_88_type 1354 -#define _gather_89_type 1355 -#define _tmp_90_type 1356 -#define _loop0_91_type 1357 -#define _gather_92_type 1358 -#define _loop0_93_type 1359 -#define _gather_94_type 1360 -#define _loop0_95_type 1361 -#define _loop0_96_type 1362 -#define _gather_97_type 1363 -#define _loop1_98_type 1364 -#define _tmp_99_type 1365 -#define _loop0_100_type 1366 -#define _gather_101_type 1367 -#define _loop0_102_type 1368 -#define _gather_103_type 1369 -#define _tmp_104_type 1370 -#define _tmp_105_type 1371 -#define _loop0_106_type 1372 -#define _gather_107_type 1373 -#define _tmp_108_type 1374 -#define _tmp_109_type 1375 -#define _tmp_110_type 1376 -#define _tmp_111_type 1377 -#define _tmp_112_type 1378 -#define _loop1_113_type 1379 -#define _tmp_114_type 1380 -#define _tmp_115_type 1381 -#define _tmp_116_type 1382 -#define _tmp_117_type 1383 -#define _tmp_118_type 1384 -#define _loop0_119_type 1385 -#define _loop0_120_type 1386 -#define _tmp_121_type 1387 -#define _tmp_122_type 1388 -#define _tmp_123_type 1389 -#define _tmp_124_type 1390 -#define _tmp_125_type 1391 -#define _tmp_126_type 1392 -#define _tmp_127_type 1393 -#define _tmp_128_type 1394 -#define _tmp_129_type 1395 -#define _loop0_130_type 1396 -#define _gather_131_type 1397 -#define _tmp_132_type 1398 -#define _tmp_133_type 1399 -#define _tmp_134_type 1400 -#define _tmp_135_type 1401 -#define _loop0_136_type 1402 -#define _gather_137_type 1403 -#define _tmp_138_type 1404 -#define _loop0_139_type 1405 -#define _gather_140_type 1406 -#define _loop0_141_type 1407 -#define _gather_142_type 1408 -#define _tmp_143_type 1409 -#define _loop0_144_type 1410 -#define _tmp_145_type 1411 -#define _tmp_146_type 1412 -#define _tmp_147_type 1413 -#define _tmp_148_type 1414 -#define _tmp_149_type 1415 -#define _tmp_150_type 1416 -#define _tmp_151_type 1417 -#define _tmp_152_type 1418 -#define _tmp_153_type 1419 -#define _tmp_154_type 1420 -#define _tmp_155_type 1421 -#define _tmp_156_type 1422 -#define _tmp_157_type 1423 -#define _tmp_158_type 1424 -#define _tmp_159_type 1425 -#define _tmp_160_type 1426 -#define _tmp_161_type 1427 -#define _tmp_162_type 1428 -#define _tmp_163_type 1429 -#define _tmp_164_type 1430 -#define _tmp_165_type 1431 -#define _tmp_166_type 1432 -#define _tmp_167_type 1433 -#define _tmp_168_type 1434 -#define _tmp_169_type 1435 -#define _tmp_170_type 1436 -#define _loop0_171_type 1437 -#define _tmp_172_type 1438 -#define _tmp_173_type 1439 -#define _tmp_174_type 1440 -#define _tmp_175_type 1441 -#define _tmp_176_type 1442 +#define invalid_string_tstring_concat_type 1264 +#define invalid_arithmetic_type 1265 +#define invalid_factor_type 1266 +#define invalid_type_params_type 1267 +#define _loop0_1_type 1268 +#define _loop1_2_type 1269 +#define _loop0_3_type 1270 +#define _gather_4_type 1271 +#define _tmp_5_type 1272 +#define _tmp_6_type 1273 +#define _tmp_7_type 1274 +#define _tmp_8_type 1275 +#define _tmp_9_type 1276 +#define _tmp_10_type 1277 +#define _tmp_11_type 1278 +#define _loop1_12_type 1279 +#define _tmp_13_type 1280 +#define _loop0_14_type 1281 +#define _gather_15_type 1282 +#define _tmp_16_type 1283 +#define _tmp_17_type 1284 +#define _loop0_18_type 1285 +#define _loop1_19_type 1286 +#define _loop0_20_type 1287 +#define _gather_21_type 1288 +#define _tmp_22_type 1289 +#define _loop0_23_type 1290 +#define _gather_24_type 1291 +#define _loop1_25_type 1292 +#define _tmp_26_type 1293 +#define _tmp_27_type 1294 +#define _loop0_28_type 1295 +#define _loop0_29_type 1296 +#define _loop1_30_type 1297 +#define _loop1_31_type 1298 +#define _loop0_32_type 1299 +#define _loop1_33_type 1300 +#define _loop0_34_type 1301 +#define _gather_35_type 1302 +#define _tmp_36_type 1303 +#define _loop1_37_type 1304 +#define _loop1_38_type 1305 +#define _loop1_39_type 1306 +#define _loop0_40_type 1307 +#define _gather_41_type 1308 +#define _tmp_42_type 1309 +#define _tmp_43_type 1310 +#define _tmp_44_type 1311 +#define _loop0_45_type 1312 +#define _gather_46_type 1313 +#define _loop0_47_type 1314 +#define _gather_48_type 1315 +#define _tmp_49_type 1316 +#define _loop0_50_type 1317 +#define _gather_51_type 1318 +#define _loop0_52_type 1319 +#define _gather_53_type 1320 +#define _loop0_54_type 1321 +#define _gather_55_type 1322 +#define _loop1_56_type 1323 +#define _loop1_57_type 1324 +#define _loop0_58_type 1325 +#define _gather_59_type 1326 +#define _loop1_60_type 1327 +#define _loop1_61_type 1328 +#define _loop1_62_type 1329 +#define _tmp_63_type 1330 +#define _loop0_64_type 1331 +#define _gather_65_type 1332 +#define _tmp_66_type 1333 +#define _tmp_67_type 1334 +#define _tmp_68_type 1335 +#define _tmp_69_type 1336 +#define _tmp_70_type 1337 +#define _loop0_71_type 1338 +#define _loop0_72_type 1339 +#define _loop1_73_type 1340 +#define _loop1_74_type 1341 +#define _loop0_75_type 1342 +#define _loop1_76_type 1343 +#define _loop0_77_type 1344 +#define _loop0_78_type 1345 +#define _loop0_79_type 1346 +#define _loop0_80_type 1347 +#define _loop1_81_type 1348 +#define _loop1_82_type 1349 +#define _tmp_83_type 1350 +#define _loop0_84_type 1351 +#define _gather_85_type 1352 +#define _loop1_86_type 1353 +#define _loop0_87_type 1354 +#define _tmp_88_type 1355 +#define _loop0_89_type 1356 +#define _gather_90_type 1357 +#define _tmp_91_type 1358 +#define _loop0_92_type 1359 +#define _gather_93_type 1360 +#define _loop0_94_type 1361 +#define _gather_95_type 1362 +#define _loop0_96_type 1363 +#define _loop0_97_type 1364 +#define _gather_98_type 1365 +#define _loop1_99_type 1366 +#define _tmp_100_type 1367 +#define _loop0_101_type 1368 +#define _gather_102_type 1369 +#define _loop0_103_type 1370 +#define _gather_104_type 1371 +#define _tmp_105_type 1372 +#define _tmp_106_type 1373 +#define _loop0_107_type 1374 +#define _gather_108_type 1375 +#define _tmp_109_type 1376 +#define _tmp_110_type 1377 +#define _tmp_111_type 1378 +#define _tmp_112_type 1379 +#define _tmp_113_type 1380 +#define _loop1_114_type 1381 +#define _tmp_115_type 1382 +#define _tmp_116_type 1383 +#define _tmp_117_type 1384 +#define _tmp_118_type 1385 +#define _tmp_119_type 1386 +#define _loop0_120_type 1387 +#define _loop0_121_type 1388 +#define _tmp_122_type 1389 +#define _tmp_123_type 1390 +#define _tmp_124_type 1391 +#define _tmp_125_type 1392 +#define _tmp_126_type 1393 +#define _tmp_127_type 1394 +#define _tmp_128_type 1395 +#define _tmp_129_type 1396 +#define _tmp_130_type 1397 +#define _loop0_131_type 1398 +#define _gather_132_type 1399 +#define _tmp_133_type 1400 +#define _tmp_134_type 1401 +#define _tmp_135_type 1402 +#define _tmp_136_type 1403 +#define _loop0_137_type 1404 +#define _gather_138_type 1405 +#define _tmp_139_type 1406 +#define _loop0_140_type 1407 +#define _gather_141_type 1408 +#define _loop0_142_type 1409 +#define _gather_143_type 1410 +#define _tmp_144_type 1411 +#define _loop0_145_type 1412 +#define _tmp_146_type 1413 +#define _tmp_147_type 1414 +#define _tmp_148_type 1415 +#define _tmp_149_type 1416 +#define _tmp_150_type 1417 +#define _tmp_151_type 1418 +#define _tmp_152_type 1419 +#define _tmp_153_type 1420 +#define _tmp_154_type 1421 +#define _tmp_155_type 1422 +#define _tmp_156_type 1423 +#define _tmp_157_type 1424 +#define _tmp_158_type 1425 +#define _tmp_159_type 1426 +#define _tmp_160_type 1427 +#define _tmp_161_type 1428 +#define _tmp_162_type 1429 +#define _tmp_163_type 1430 +#define _tmp_164_type 1431 +#define _tmp_165_type 1432 +#define _tmp_166_type 1433 +#define _tmp_167_type 1434 +#define _tmp_168_type 1435 +#define _tmp_169_type 1436 +#define _tmp_170_type 1437 +#define _tmp_171_type 1438 +#define _loop0_172_type 1439 +#define _tmp_173_type 1440 +#define _tmp_174_type 1441 +#define _tmp_175_type 1442 +#define _tmp_176_type 1443 +#define _tmp_177_type 1444 static mod_ty file_rule(Parser *p); static mod_ty interactive_rule(Parser *p); @@ -792,6 +794,7 @@ static void *invalid_fstring_replacement_field_rule(Parser *p); static void *invalid_fstring_conversion_character_rule(Parser *p); static void *invalid_tstring_replacement_field_rule(Parser *p); static void *invalid_tstring_conversion_character_rule(Parser *p); +static void *invalid_string_tstring_concat_rule(Parser *p); static void *invalid_arithmetic_rule(Parser *p); static void *invalid_factor_rule(Parser *p); static void *invalid_type_params_rule(Parser *p); @@ -876,46 +879,46 @@ static asdl_seq *_loop0_78_rule(Parser *p); static asdl_seq *_loop0_79_rule(Parser *p); static asdl_seq *_loop0_80_rule(Parser *p); static asdl_seq *_loop1_81_rule(Parser *p); -static void *_tmp_82_rule(Parser *p); -static asdl_seq *_loop0_83_rule(Parser *p); -static asdl_seq *_gather_84_rule(Parser *p); -static asdl_seq *_loop1_85_rule(Parser *p); -static asdl_seq *_loop0_86_rule(Parser *p); -static void *_tmp_87_rule(Parser *p); -static asdl_seq *_loop0_88_rule(Parser *p); -static asdl_seq *_gather_89_rule(Parser *p); -static void *_tmp_90_rule(Parser *p); -static asdl_seq *_loop0_91_rule(Parser *p); -static asdl_seq *_gather_92_rule(Parser *p); -static asdl_seq *_loop0_93_rule(Parser *p); -static asdl_seq *_gather_94_rule(Parser *p); -static asdl_seq *_loop0_95_rule(Parser *p); +static asdl_seq *_loop1_82_rule(Parser *p); +static void *_tmp_83_rule(Parser *p); +static asdl_seq *_loop0_84_rule(Parser *p); +static asdl_seq *_gather_85_rule(Parser *p); +static asdl_seq *_loop1_86_rule(Parser *p); +static asdl_seq *_loop0_87_rule(Parser *p); +static void *_tmp_88_rule(Parser *p); +static asdl_seq *_loop0_89_rule(Parser *p); +static asdl_seq *_gather_90_rule(Parser *p); +static void *_tmp_91_rule(Parser *p); +static asdl_seq *_loop0_92_rule(Parser *p); +static asdl_seq *_gather_93_rule(Parser *p); +static asdl_seq *_loop0_94_rule(Parser *p); +static asdl_seq *_gather_95_rule(Parser *p); static asdl_seq *_loop0_96_rule(Parser *p); -static asdl_seq *_gather_97_rule(Parser *p); -static asdl_seq *_loop1_98_rule(Parser *p); -static void *_tmp_99_rule(Parser *p); -static asdl_seq *_loop0_100_rule(Parser *p); -static asdl_seq *_gather_101_rule(Parser *p); -static asdl_seq *_loop0_102_rule(Parser *p); -static asdl_seq *_gather_103_rule(Parser *p); -static void *_tmp_104_rule(Parser *p); +static asdl_seq *_loop0_97_rule(Parser *p); +static asdl_seq *_gather_98_rule(Parser *p); +static asdl_seq *_loop1_99_rule(Parser *p); +static void *_tmp_100_rule(Parser *p); +static asdl_seq *_loop0_101_rule(Parser *p); +static asdl_seq *_gather_102_rule(Parser *p); +static asdl_seq *_loop0_103_rule(Parser *p); +static asdl_seq *_gather_104_rule(Parser *p); static void *_tmp_105_rule(Parser *p); -static asdl_seq *_loop0_106_rule(Parser *p); -static asdl_seq *_gather_107_rule(Parser *p); -static void *_tmp_108_rule(Parser *p); +static void *_tmp_106_rule(Parser *p); +static asdl_seq *_loop0_107_rule(Parser *p); +static asdl_seq *_gather_108_rule(Parser *p); static void *_tmp_109_rule(Parser *p); static void *_tmp_110_rule(Parser *p); static void *_tmp_111_rule(Parser *p); static void *_tmp_112_rule(Parser *p); -static asdl_seq *_loop1_113_rule(Parser *p); -static void *_tmp_114_rule(Parser *p); +static void *_tmp_113_rule(Parser *p); +static asdl_seq *_loop1_114_rule(Parser *p); static void *_tmp_115_rule(Parser *p); static void *_tmp_116_rule(Parser *p); static void *_tmp_117_rule(Parser *p); static void *_tmp_118_rule(Parser *p); -static asdl_seq *_loop0_119_rule(Parser *p); +static void *_tmp_119_rule(Parser *p); static asdl_seq *_loop0_120_rule(Parser *p); -static void *_tmp_121_rule(Parser *p); +static asdl_seq *_loop0_121_rule(Parser *p); static void *_tmp_122_rule(Parser *p); static void *_tmp_123_rule(Parser *p); static void *_tmp_124_rule(Parser *p); @@ -924,22 +927,22 @@ static void *_tmp_126_rule(Parser *p); static void *_tmp_127_rule(Parser *p); static void *_tmp_128_rule(Parser *p); static void *_tmp_129_rule(Parser *p); -static asdl_seq *_loop0_130_rule(Parser *p); -static asdl_seq *_gather_131_rule(Parser *p); -static void *_tmp_132_rule(Parser *p); +static void *_tmp_130_rule(Parser *p); +static asdl_seq *_loop0_131_rule(Parser *p); +static asdl_seq *_gather_132_rule(Parser *p); static void *_tmp_133_rule(Parser *p); static void *_tmp_134_rule(Parser *p); static void *_tmp_135_rule(Parser *p); -static asdl_seq *_loop0_136_rule(Parser *p); -static asdl_seq *_gather_137_rule(Parser *p); -static void *_tmp_138_rule(Parser *p); -static asdl_seq *_loop0_139_rule(Parser *p); -static asdl_seq *_gather_140_rule(Parser *p); -static asdl_seq *_loop0_141_rule(Parser *p); -static asdl_seq *_gather_142_rule(Parser *p); -static void *_tmp_143_rule(Parser *p); -static asdl_seq *_loop0_144_rule(Parser *p); -static void *_tmp_145_rule(Parser *p); +static void *_tmp_136_rule(Parser *p); +static asdl_seq *_loop0_137_rule(Parser *p); +static asdl_seq *_gather_138_rule(Parser *p); +static void *_tmp_139_rule(Parser *p); +static asdl_seq *_loop0_140_rule(Parser *p); +static asdl_seq *_gather_141_rule(Parser *p); +static asdl_seq *_loop0_142_rule(Parser *p); +static asdl_seq *_gather_143_rule(Parser *p); +static void *_tmp_144_rule(Parser *p); +static asdl_seq *_loop0_145_rule(Parser *p); static void *_tmp_146_rule(Parser *p); static void *_tmp_147_rule(Parser *p); static void *_tmp_148_rule(Parser *p); @@ -965,12 +968,13 @@ static void *_tmp_167_rule(Parser *p); static void *_tmp_168_rule(Parser *p); static void *_tmp_169_rule(Parser *p); static void *_tmp_170_rule(Parser *p); -static asdl_seq *_loop0_171_rule(Parser *p); -static void *_tmp_172_rule(Parser *p); +static void *_tmp_171_rule(Parser *p); +static asdl_seq *_loop0_172_rule(Parser *p); static void *_tmp_173_rule(Parser *p); static void *_tmp_174_rule(Parser *p); static void *_tmp_175_rule(Parser *p); static void *_tmp_176_rule(Parser *p); +static void *_tmp_177_rule(Parser *p); // file: statements? $ @@ -1197,7 +1201,7 @@ statements_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+")); - _res = _PyPegen_register_stmts ( p , ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a ) ); + _res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; @@ -1240,7 +1244,7 @@ statement_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt")); - _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a ); + _res = _PyPegen_register_stmts ( p , ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; @@ -1677,7 +1681,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('import' | 'from') import_stmt")); stmt_ty import_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_5_rule, p) + _PyPegen_lookahead(1, _tmp_5_rule, p) && (import_stmt_var = import_stmt_rule(p)) // import_stmt ) @@ -1915,7 +1919,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('def' | '@' | 'async') function_def")); stmt_ty function_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_6_rule, p) + _PyPegen_lookahead(1, _tmp_6_rule, p) && (function_def_var = function_def_rule(p)) // function_def ) @@ -1957,7 +1961,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('class' | '@') class_def")); stmt_ty class_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_7_rule, p) + _PyPegen_lookahead(1, _tmp_7_rule, p) && (class_def_var = class_def_rule(p)) // class_def ) @@ -1978,7 +1982,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('with' | 'async') with_stmt")); stmt_ty with_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_8_rule, p) + _PyPegen_lookahead(1, _tmp_8_rule, p) && (with_stmt_var = with_stmt_rule(p)) // with_stmt ) @@ -1999,7 +2003,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('for' | 'async') for_stmt")); stmt_ty for_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_9_rule, p) + _PyPegen_lookahead(1, _tmp_9_rule, p) && (for_stmt_var = for_stmt_rule(p)) // for_stmt ) @@ -3213,7 +3217,7 @@ del_stmt_rule(Parser *p) && (a = del_targets_rule(p)) // del_targets && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_16_rule, p) + _PyPegen_lookahead(1, _tmp_16_rule, p) ) { D(fprintf(stderr, "%*c+ del_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'del' del_targets &(';' | NEWLINE)")); @@ -6856,7 +6860,7 @@ with_item_rule(Parser *p) && (t = star_target_rule(p)) // star_target && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' star_target &(',' | ')' | ':')")); @@ -8445,7 +8449,7 @@ literal_pattern_rule(Parser *p) if ( (value = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8679,7 +8683,7 @@ literal_expr_rule(Parser *p) if ( (signed_number_var = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8717,7 +8721,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -9281,7 +9285,7 @@ pattern_capture_target_rule(Parser *p) && (name = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ pattern_capture_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!\"_\" NAME !('.' | '(' | '=')")); @@ -9396,7 +9400,7 @@ value_pattern_rule(Parser *p) if ( (attr = attr_rule(p)) // attr && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ value_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "attr !('.' | '(' | '=')")); @@ -15049,7 +15053,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -17057,7 +17061,7 @@ string_rule(Parser *p) return _res; } -// strings: ((fstring | string | tstring))+ +// strings: invalid_string_tstring_concat | ((fstring | string))+ | tstring+ static expr_ty strings_rule(Parser *p) { @@ -17083,18 +17087,37 @@ strings_rule(Parser *p) UNUSED(_start_lineno); // Only used by EXTRA macro int _start_col_offset = p->tokens[_mark]->col_offset; UNUSED(_start_col_offset); // Only used by EXTRA macro - { // ((fstring | string | tstring))+ + if (p->call_invalid_rules) { // invalid_string_tstring_concat + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> strings[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_string_tstring_concat")); + void *invalid_string_tstring_concat_var; + if ( + (invalid_string_tstring_concat_var = invalid_string_tstring_concat_rule(p)) // invalid_string_tstring_concat + ) + { + D(fprintf(stderr, "%*c+ strings[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_string_tstring_concat")); + _res = invalid_string_tstring_concat_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s strings[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_string_tstring_concat")); + } + { // ((fstring | string))+ if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> strings[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((fstring | string | tstring))+")); + D(fprintf(stderr, "%*c> strings[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((fstring | string))+")); asdl_expr_seq* a; if ( - (a = (asdl_expr_seq*)_loop1_81_rule(p)) // ((fstring | string | tstring))+ + (a = (asdl_expr_seq*)_loop1_81_rule(p)) // ((fstring | string))+ ) { - D(fprintf(stderr, "%*c+ strings[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "((fstring | string | tstring))+")); + D(fprintf(stderr, "%*c+ strings[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "((fstring | string))+")); Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; @@ -17114,7 +17137,40 @@ strings_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s strings[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "((fstring | string | tstring))+")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "((fstring | string))+")); + } + { // tstring+ + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> strings[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tstring+")); + asdl_expr_seq* a; + if ( + (a = (asdl_expr_seq*)_loop1_82_rule(p)) // tstring+ + ) + { + D(fprintf(stderr, "%*c+ strings[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "tstring+")); + Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); + if (_token == NULL) { + p->level--; + return NULL; + } + int _end_lineno = _token->end_lineno; + UNUSED(_end_lineno); // Only used by EXTRA macro + int _end_col_offset = _token->end_col_offset; + UNUSED(_end_col_offset); // Only used by EXTRA macro + _res = _PyPegen_concatenate_tstrings ( p , a , EXTRA ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s strings[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "tstring+")); } _res = NULL; done: @@ -17224,7 +17280,7 @@ tuple_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 7)) // token='(' && - (a = _tmp_82_rule(p), !p->error_indicator) // [star_named_expression ',' star_named_expressions?] + (a = _tmp_83_rule(p), !p->error_indicator) // [star_named_expression ',' star_named_expressions?] && (_literal_1 = _PyPegen_expect_token(p, 8)) // token=')' ) @@ -17439,7 +17495,7 @@ double_starred_kvpairs_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings asdl_seq * a; if ( - (a = _gather_84_rule(p)) // ','.double_starred_kvpair+ + (a = _gather_85_rule(p)) // ','.double_starred_kvpair+ && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) @@ -17598,7 +17654,7 @@ for_if_clauses_rule(Parser *p) D(fprintf(stderr, "%*c> for_if_clauses[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "for_if_clause+")); asdl_comprehension_seq* a; if ( - (a = (asdl_comprehension_seq*)_loop1_85_rule(p)) // for_if_clause+ + (a = (asdl_comprehension_seq*)_loop1_86_rule(p)) // for_if_clause+ ) { D(fprintf(stderr, "%*c+ for_if_clauses[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "for_if_clause+")); @@ -17663,7 +17719,7 @@ for_if_clause_rule(Parser *p) && (b = disjunction_rule(p)) // disjunction && - (c = (asdl_expr_seq*)_loop0_86_rule(p)) // (('if' disjunction))* + (c = (asdl_expr_seq*)_loop0_87_rule(p)) // (('if' disjunction))* ) { D(fprintf(stderr, "%*c+ for_if_clause[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async' 'for' star_targets 'in' ~ disjunction (('if' disjunction))*")); @@ -17706,7 +17762,7 @@ for_if_clause_rule(Parser *p) && (b = disjunction_rule(p)) // disjunction && - (c = (asdl_expr_seq*)_loop0_86_rule(p)) // (('if' disjunction))* + (c = (asdl_expr_seq*)_loop0_87_rule(p)) // (('if' disjunction))* ) { D(fprintf(stderr, "%*c+ for_if_clause[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for' star_targets 'in' ~ disjunction (('if' disjunction))*")); @@ -17985,7 +18041,7 @@ genexp_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 7)) // token='(' && - (a = _tmp_87_rule(p)) // assignment_expression | expression !':=' + (a = _tmp_88_rule(p)) // assignment_expression | expression !':=' && (b = for_if_clauses_rule(p)) // for_if_clauses && @@ -18234,9 +18290,9 @@ args_rule(Parser *p) asdl_expr_seq* a; void *b; if ( - (a = (asdl_expr_seq*)_gather_89_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ + (a = (asdl_expr_seq*)_gather_90_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ && - (b = _tmp_90_rule(p), !p->error_indicator) // [',' kwargs] + (b = _tmp_91_rule(p), !p->error_indicator) // [',' kwargs] ) { D(fprintf(stderr, "%*c+ args[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ [',' kwargs]")); @@ -18326,11 +18382,11 @@ kwargs_rule(Parser *p) asdl_seq * a; asdl_seq * b; if ( - (a = _gather_92_rule(p)) // ','.kwarg_or_starred+ + (a = _gather_93_rule(p)) // ','.kwarg_or_starred+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (b = _gather_94_rule(p)) // ','.kwarg_or_double_starred+ + (b = _gather_95_rule(p)) // ','.kwarg_or_double_starred+ ) { D(fprintf(stderr, "%*c+ kwargs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.kwarg_or_starred+ ',' ','.kwarg_or_double_starred+")); @@ -18352,13 +18408,13 @@ kwargs_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> kwargs[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.kwarg_or_starred+")); - asdl_seq * _gather_92_var; + asdl_seq * _gather_93_var; if ( - (_gather_92_var = _gather_92_rule(p)) // ','.kwarg_or_starred+ + (_gather_93_var = _gather_93_rule(p)) // ','.kwarg_or_starred+ ) { D(fprintf(stderr, "%*c+ kwargs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.kwarg_or_starred+")); - _res = _gather_92_var; + _res = _gather_93_var; goto done; } p->mark = _mark; @@ -18371,13 +18427,13 @@ kwargs_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> kwargs[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.kwarg_or_double_starred+")); - asdl_seq * _gather_94_var; + asdl_seq * _gather_95_var; if ( - (_gather_94_var = _gather_94_rule(p)) // ','.kwarg_or_double_starred+ + (_gather_95_var = _gather_95_rule(p)) // ','.kwarg_or_double_starred+ ) { D(fprintf(stderr, "%*c+ kwargs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.kwarg_or_double_starred+")); - _res = _gather_94_var; + _res = _gather_95_var; goto done; } p->mark = _mark; @@ -18788,7 +18844,7 @@ star_targets_rule(Parser *p) if ( (a = star_target_rule(p)) // star_target && - (b = _loop0_95_rule(p)) // ((',' star_target))* + (b = _loop0_96_rule(p)) // ((',' star_target))* && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) @@ -18844,7 +18900,7 @@ star_targets_list_seq_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings asdl_expr_seq* a; if ( - (a = (asdl_expr_seq*)_gather_97_rule(p)) // ','.star_target+ + (a = (asdl_expr_seq*)_gather_98_rule(p)) // ','.star_target+ && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) @@ -18894,7 +18950,7 @@ star_targets_tuple_seq_rule(Parser *p) if ( (a = star_target_rule(p)) // star_target && - (b = _loop1_98_rule(p)) // ((',' star_target))+ + (b = _loop1_99_rule(p)) // ((',' star_target))+ && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) @@ -18982,7 +19038,7 @@ star_target_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (a = _tmp_99_rule(p)) // !'*' star_target + (a = _tmp_100_rule(p)) // !'*' star_target ) { D(fprintf(stderr, "%*c+ star_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' (!'*' star_target)")); @@ -19078,7 +19134,7 @@ target_with_star_atom_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19122,7 +19178,7 @@ target_with_star_atom_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19469,7 +19525,7 @@ single_subscript_attribute_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19513,7 +19569,7 @@ single_subscript_attribute_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19623,7 +19679,7 @@ t_primary_raw(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME &t_lookahead")); @@ -19667,7 +19723,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' &t_lookahead")); @@ -19705,7 +19761,7 @@ t_primary_raw(Parser *p) && (b = genexp_rule(p)) // genexp && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary genexp &t_lookahead")); @@ -19749,7 +19805,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 8)) // token=')' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '(' arguments? ')' &t_lookahead")); @@ -19784,7 +19840,7 @@ t_primary_raw(Parser *p) if ( (a = atom_rule(p)) // atom && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "atom &t_lookahead")); @@ -19905,7 +19961,7 @@ del_targets_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings asdl_expr_seq* a; if ( - (a = (asdl_expr_seq*)_gather_101_rule(p)) // ','.del_target+ + (a = (asdl_expr_seq*)_gather_102_rule(p)) // ','.del_target+ && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) @@ -19974,7 +20030,7 @@ del_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -20018,7 +20074,7 @@ del_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -20263,7 +20319,7 @@ type_expressions_rule(Parser *p) expr_ty b; expr_ty c; if ( - (a = _gather_103_rule(p)) // ','.expression+ + (a = _gather_104_rule(p)) // ','.expression+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && @@ -20302,7 +20358,7 @@ type_expressions_rule(Parser *p) asdl_seq * a; expr_ty b; if ( - (a = _gather_103_rule(p)) // ','.expression+ + (a = _gather_104_rule(p)) // ','.expression+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && @@ -20335,7 +20391,7 @@ type_expressions_rule(Parser *p) asdl_seq * a; expr_ty b; if ( - (a = _gather_103_rule(p)) // ','.expression+ + (a = _gather_104_rule(p)) // ','.expression+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && @@ -20455,7 +20511,7 @@ type_expressions_rule(Parser *p) D(fprintf(stderr, "%*c> type_expressions[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.expression+")); asdl_expr_seq* a; if ( - (a = (asdl_expr_seq*)_gather_103_rule(p)) // ','.expression+ + (a = (asdl_expr_seq*)_gather_104_rule(p)) // ','.expression+ ) { D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+")); @@ -20506,7 +20562,7 @@ func_type_comment_rule(Parser *p) && (t = _PyPegen_expect_token(p, TYPE_COMMENT)) // token='TYPE_COMMENT' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_104_rule, p) + _PyPegen_lookahead(1, _tmp_105_rule, p) ) { D(fprintf(stderr, "%*c+ func_type_comment[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE TYPE_COMMENT &(NEWLINE INDENT)")); @@ -20592,15 +20648,15 @@ invalid_arguments_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs) | kwargs) ',' ','.(starred_expression !'=')+")); - asdl_seq * _gather_107_var; - void *_tmp_105_var; + asdl_seq * _gather_108_var; + void *_tmp_106_var; Token * a; if ( - (_tmp_105_var = _tmp_105_rule(p)) // (','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs) | kwargs + (_tmp_106_var = _tmp_106_rule(p)) // (','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs) | kwargs && (a = _PyPegen_expect_token(p, 12)) // token=',' && - (_gather_107_var = _gather_107_rule(p)) // ','.(starred_expression !'=')+ + (_gather_108_var = _gather_108_rule(p)) // ','.(starred_expression !'=')+ ) { D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "((','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs) | kwargs) ',' ','.(starred_expression !'=')+")); @@ -20634,7 +20690,7 @@ invalid_arguments_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (_opt_var = _tmp_108_rule(p), !p->error_indicator) // [args | expression for_if_clauses] + (_opt_var = _tmp_109_rule(p), !p->error_indicator) // [args | expression for_if_clauses] ) { D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); @@ -20694,13 +20750,13 @@ invalid_arguments_rule(Parser *p) expr_ty a; Token * b; if ( - (_opt_var = _tmp_109_rule(p), !p->error_indicator) // [(args ',')] + (_opt_var = _tmp_110_rule(p), !p->error_indicator) // [(args ',')] && (a = _PyPegen_name_token(p)) // NAME && (b = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_110_rule, p) + _PyPegen_lookahead(1, _tmp_111_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "[(args ',')] NAME '=' &(',' | ')')")); @@ -20838,7 +20894,7 @@ invalid_kwarg_rule(Parser *p) Token* a; Token * b; if ( - (a = (Token*)_tmp_111_rule(p)) // 'True' | 'False' | 'None' + (a = (Token*)_tmp_112_rule(p)) // 'True' | 'False' | 'None' && (b = _PyPegen_expect_token(p, 22)) // token='=' ) @@ -20898,7 +20954,7 @@ invalid_kwarg_rule(Parser *p) expr_ty a; Token * b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_112_rule, p) + _PyPegen_lookahead(0, _tmp_113_rule, p) && (a = expression_rule(p)) // expression && @@ -21246,7 +21302,7 @@ invalid_expression_rule(Parser *p) if ( (string_var = _PyPegen_string_token(p)) // STRING && - (a = _loop1_113_rule(p)) // ((!STRING expression_without_invalid))+ + (a = _loop1_114_rule(p)) // ((!STRING expression_without_invalid))+ && (string_var_1 = _PyPegen_string_token(p)) // STRING ) @@ -21273,7 +21329,7 @@ invalid_expression_rule(Parser *p) expr_ty a; expr_ty b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_114_rule, p) + _PyPegen_lookahead(0, _tmp_115_rule, p) && (a = disjunction_rule(p)) // disjunction && @@ -21309,7 +21365,7 @@ invalid_expression_rule(Parser *p) && (b = disjunction_rule(p)) // disjunction && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_115_rule, p) + _PyPegen_lookahead(0, _tmp_116_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction !('else' | ':')")); @@ -21344,7 +21400,7 @@ invalid_expression_rule(Parser *p) && (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) expression_rule, p) + _PyPegen_lookahead_for_expr(0, expression_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction 'else' !expression")); @@ -21372,7 +21428,7 @@ invalid_expression_rule(Parser *p) expr_ty b; stmt_ty c; if ( - (a = (stmt_ty)_tmp_116_rule(p)) // pass_stmt | break_stmt | continue_stmt + (a = (stmt_ty)_tmp_117_rule(p)) // pass_stmt | break_stmt | continue_stmt && (_keyword = _PyPegen_expect_token(p, 682)) // token='if' && @@ -21534,7 +21590,7 @@ invalid_named_expression_rule(Parser *p) && (b = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_118_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME '=' bitwise_or !('=' | ':=')")); @@ -21560,7 +21616,7 @@ invalid_named_expression_rule(Parser *p) Token * b; expr_ty bitwise_or_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_118_rule, p) + _PyPegen_lookahead(0, _tmp_119_rule, p) && (a = bitwise_or_rule(p)) // bitwise_or && @@ -21568,7 +21624,7 @@ invalid_named_expression_rule(Parser *p) && (bitwise_or_var = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_118_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(list | tuple | genexp | 'True' | 'None' | 'False') bitwise_or '=' bitwise_or !('=' | ':=')")); @@ -21648,7 +21704,7 @@ invalid_assignment_rule(Parser *p) D(fprintf(stderr, "%*c> invalid_assignment[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_named_expression ',' star_named_expressions* ':' expression")); Token * _literal; Token * _literal_1; - asdl_seq * _loop0_119_var; + asdl_seq * _loop0_120_var; expr_ty a; expr_ty expression_var; if ( @@ -21656,7 +21712,7 @@ invalid_assignment_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (_loop0_119_var = _loop0_119_rule(p)) // star_named_expressions* + (_loop0_120_var = _loop0_120_rule(p)) // star_named_expressions* && (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' && @@ -21713,10 +21769,10 @@ invalid_assignment_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_assignment[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((star_targets '='))* star_expressions '='")); Token * _literal; - asdl_seq * _loop0_120_var; + asdl_seq * _loop0_121_var; expr_ty a; if ( - (_loop0_120_var = _loop0_120_rule(p)) // ((star_targets '='))* + (_loop0_121_var = _loop0_121_rule(p)) // ((star_targets '='))* && (a = star_expressions_rule(p)) // star_expressions && @@ -21743,10 +21799,10 @@ invalid_assignment_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_assignment[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((star_targets '='))* yield_expr '='")); Token * _literal; - asdl_seq * _loop0_120_var; + asdl_seq * _loop0_121_var; expr_ty a; if ( - (_loop0_120_var = _loop0_120_rule(p)) // ((star_targets '='))* + (_loop0_121_var = _loop0_121_rule(p)) // ((star_targets '='))* && (a = yield_expr_rule(p)) // yield_expr && @@ -22002,11 +22058,11 @@ invalid_comprehension_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_comprehension[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('[' | '(' | '{') starred_expression for_if_clauses")); - void *_tmp_121_var; + void *_tmp_122_var; expr_ty a; asdl_comprehension_seq* for_if_clauses_var; if ( - (_tmp_121_var = _tmp_121_rule(p)) // '[' | '(' | '{' + (_tmp_122_var = _tmp_122_rule(p)) // '[' | '(' | '{' && (a = starred_expression_rule(p)) // starred_expression && @@ -22033,12 +22089,12 @@ invalid_comprehension_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_comprehension[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('[' | '{') star_named_expression ',' star_named_expressions for_if_clauses")); Token * _literal; - void *_tmp_122_var; + void *_tmp_123_var; expr_ty a; asdl_expr_seq* b; asdl_comprehension_seq* for_if_clauses_var; if ( - (_tmp_122_var = _tmp_122_rule(p)) // '[' | '{' + (_tmp_123_var = _tmp_123_rule(p)) // '[' | '{' && (a = star_named_expression_rule(p)) // star_named_expression && @@ -22068,12 +22124,12 @@ invalid_comprehension_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_comprehension[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('[' | '{') star_named_expression ',' for_if_clauses")); - void *_tmp_122_var; + void *_tmp_123_var; expr_ty a; Token * b; asdl_comprehension_seq* for_if_clauses_var; if ( - (_tmp_122_var = _tmp_122_rule(p)) // '[' | '{' + (_tmp_123_var = _tmp_123_rule(p)) // '[' | '{' && (a = star_named_expression_rule(p)) // star_named_expression && @@ -22209,10 +22265,10 @@ invalid_parameters_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_parameters[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(slash_no_default | slash_with_default) param_maybe_default* '/'")); asdl_seq * _loop0_32_var; - void *_tmp_123_var; + void *_tmp_124_var; Token * a; if ( - (_tmp_123_var = _tmp_123_rule(p)) // slash_no_default | slash_with_default + (_tmp_124_var = _tmp_124_rule(p)) // slash_no_default | slash_with_default && (_loop0_32_var = _loop0_32_rule(p)) // param_maybe_default* && @@ -22314,16 +22370,16 @@ invalid_parameters_rule(Parser *p) asdl_seq * _loop0_32_var_1; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings - void *_tmp_124_var; + void *_tmp_125_var; Token * a; if ( - (_opt_var = _tmp_123_rule(p), !p->error_indicator) // [(slash_no_default | slash_with_default)] + (_opt_var = _tmp_124_rule(p), !p->error_indicator) // [(slash_no_default | slash_with_default)] && (_loop0_32_var = _loop0_32_rule(p)) // param_maybe_default* && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_124_var = _tmp_124_rule(p)) // ',' | param_no_default + (_tmp_125_var = _tmp_125_rule(p)) // ',' | param_no_default && (_loop0_32_var_1 = _loop0_32_rule(p)) // param_maybe_default* && @@ -22402,7 +22458,7 @@ invalid_default_rule(Parser *p) if ( (a = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_125_rule, p) + _PyPegen_lookahead(1, _tmp_126_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'=' &(')' | ',')")); @@ -22447,12 +22503,12 @@ invalid_star_etc_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_star_etc[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*' (')' | ',' (')' | '**'))")); - void *_tmp_126_var; + void *_tmp_127_var; Token * a; if ( (a = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_126_var = _tmp_126_rule(p)) // ')' | ',' (')' | '**') + (_tmp_127_var = _tmp_127_rule(p)) // ')' | ',' (')' | '**') ) { D(fprintf(stderr, "%*c+ invalid_star_etc[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' (')' | ',' (')' | '**'))")); @@ -22536,19 +22592,19 @@ invalid_star_etc_rule(Parser *p) D(fprintf(stderr, "%*c> invalid_star_etc[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*' (param_no_default | ',') param_maybe_default* '*' (param_no_default | ',')")); Token * _literal; asdl_seq * _loop0_32_var; - void *_tmp_127_var; - void *_tmp_127_var_1; + void *_tmp_128_var; + void *_tmp_128_var_1; Token * a; if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_127_var = _tmp_127_rule(p)) // param_no_default | ',' + (_tmp_128_var = _tmp_128_rule(p)) // param_no_default | ',' && (_loop0_32_var = _loop0_32_rule(p)) // param_maybe_default* && (a = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_127_var_1 = _tmp_127_rule(p)) // param_no_default | ',' + (_tmp_128_var_1 = _tmp_128_rule(p)) // param_no_default | ',' ) { D(fprintf(stderr, "%*c+ invalid_star_etc[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' (param_no_default | ',') param_maybe_default* '*' (param_no_default | ',')")); @@ -22663,7 +22719,7 @@ invalid_kwds_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 12)) // token=',' && - (a = (Token*)_tmp_128_rule(p)) // '*' | '**' | '/' + (a = (Token*)_tmp_129_rule(p)) // '*' | '**' | '/' ) { D(fprintf(stderr, "%*c+ invalid_kwds[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' param ',' ('*' | '**' | '/')")); @@ -22800,10 +22856,10 @@ invalid_lambda_parameters_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_lambda_parameters[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(lambda_slash_no_default | lambda_slash_with_default) lambda_param_maybe_default* '/'")); asdl_seq * _loop0_75_var; - void *_tmp_129_var; + void *_tmp_130_var; Token * a; if ( - (_tmp_129_var = _tmp_129_rule(p)) // lambda_slash_no_default | lambda_slash_with_default + (_tmp_130_var = _tmp_130_rule(p)) // lambda_slash_no_default | lambda_slash_with_default && (_loop0_75_var = _loop0_75_rule(p)) // lambda_param_maybe_default* && @@ -22863,7 +22919,7 @@ invalid_lambda_parameters_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_lambda_parameters[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* '(' ','.lambda_param+ ','? ')'")); - asdl_seq * _gather_131_var; + asdl_seq * _gather_132_var; asdl_seq * _loop0_71_var; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -22874,7 +22930,7 @@ invalid_lambda_parameters_rule(Parser *p) && (a = _PyPegen_expect_token(p, 7)) // token='(' && - (_gather_131_var = _gather_131_rule(p)) // ','.lambda_param+ + (_gather_132_var = _gather_132_rule(p)) // ','.lambda_param+ && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? && @@ -22905,16 +22961,16 @@ invalid_lambda_parameters_rule(Parser *p) asdl_seq * _loop0_75_var_1; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings - void *_tmp_132_var; + void *_tmp_133_var; Token * a; if ( - (_opt_var = _tmp_129_rule(p), !p->error_indicator) // [(lambda_slash_no_default | lambda_slash_with_default)] + (_opt_var = _tmp_130_rule(p), !p->error_indicator) // [(lambda_slash_no_default | lambda_slash_with_default)] && (_loop0_75_var = _loop0_75_rule(p)) // lambda_param_maybe_default* && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_132_var = _tmp_132_rule(p)) // ',' | lambda_param_no_default + (_tmp_133_var = _tmp_133_rule(p)) // ',' | lambda_param_no_default && (_loop0_75_var_1 = _loop0_75_rule(p)) // lambda_param_maybe_default* && @@ -23057,11 +23113,11 @@ invalid_lambda_star_etc_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_lambda_star_etc[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*' (':' | ',' (':' | '**'))")); Token * _literal; - void *_tmp_133_var; + void *_tmp_134_var; if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_133_var = _tmp_133_rule(p)) // ':' | ',' (':' | '**') + (_tmp_134_var = _tmp_134_rule(p)) // ':' | ',' (':' | '**') ) { D(fprintf(stderr, "%*c+ invalid_lambda_star_etc[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' (':' | ',' (':' | '**'))")); @@ -23115,19 +23171,19 @@ invalid_lambda_star_etc_rule(Parser *p) D(fprintf(stderr, "%*c> invalid_lambda_star_etc[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*' (lambda_param_no_default | ',') lambda_param_maybe_default* '*' (lambda_param_no_default | ',')")); Token * _literal; asdl_seq * _loop0_75_var; - void *_tmp_134_var; - void *_tmp_134_var_1; + void *_tmp_135_var; + void *_tmp_135_var_1; Token * a; if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_134_var = _tmp_134_rule(p)) // lambda_param_no_default | ',' + (_tmp_135_var = _tmp_135_rule(p)) // lambda_param_no_default | ',' && (_loop0_75_var = _loop0_75_rule(p)) // lambda_param_maybe_default* && (a = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_134_var_1 = _tmp_134_rule(p)) // lambda_param_no_default | ',' + (_tmp_135_var_1 = _tmp_135_rule(p)) // lambda_param_no_default | ',' ) { D(fprintf(stderr, "%*c+ invalid_lambda_star_etc[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' (lambda_param_no_default | ',') lambda_param_maybe_default* '*' (lambda_param_no_default | ',')")); @@ -23245,7 +23301,7 @@ invalid_lambda_kwds_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 12)) // token=',' && - (a = (Token*)_tmp_128_rule(p)) // '*' | '**' | '/' + (a = (Token*)_tmp_129_rule(p)) // '*' | '**' | '/' ) { D(fprintf(stderr, "%*c+ invalid_lambda_kwds[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' lambda_param ',' ('*' | '**' | '/')")); @@ -23351,7 +23407,7 @@ invalid_with_item_rule(Parser *p) && (a = expression_rule(p)) // expression && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' expression &(',' | ')' | ':')")); @@ -23395,13 +23451,13 @@ invalid_for_if_clause_rule(Parser *p) Token * _keyword; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings - void *_tmp_135_var; + void *_tmp_136_var; if ( (_opt_var = _PyPegen_expect_token(p, 698), !p->error_indicator) // 'async'? && (_keyword = _PyPegen_expect_token(p, 694)) // token='for' && - (_tmp_135_var = _tmp_135_rule(p)) // bitwise_or ((',' bitwise_or))* ','? + (_tmp_136_var = _tmp_136_rule(p)) // bitwise_or ((',' bitwise_or))* ','? && _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 695) // token='in' ) @@ -23576,14 +23632,14 @@ invalid_import_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_import[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'import' ','.dotted_name+ 'from' dotted_name")); - asdl_seq * _gather_137_var; + asdl_seq * _gather_138_var; Token * _keyword; Token * a; expr_ty dotted_name_var; if ( (a = _PyPegen_expect_token(p, 634)) // token='import' && - (_gather_137_var = _gather_137_rule(p)) // ','.dotted_name+ + (_gather_138_var = _gather_138_rule(p)) // ','.dotted_name+ && (_keyword = _PyPegen_expect_token(p, 633)) // token='from' && @@ -23636,7 +23692,7 @@ invalid_import_rule(Parser *p) return _res; } -// invalid_dotted_as_name: dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) expression +// invalid_dotted_as_name: dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression static void * invalid_dotted_as_name_rule(Parser *p) { @@ -23649,12 +23705,12 @@ invalid_dotted_as_name_rule(Parser *p) } void * _res = NULL; int _mark = p->mark; - { // dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) expression + { // dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_dotted_as_name[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + D(fprintf(stderr, "%*c> invalid_dotted_as_name[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); Token * _keyword; expr_ty a; expr_ty dotted_name_var; @@ -23663,12 +23719,12 @@ invalid_dotted_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 680)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_139_rule, p) && (a = expression_rule(p)) // expression ) { - D(fprintf(stderr, "%*c+ invalid_dotted_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + D(fprintf(stderr, "%*c+ invalid_dotted_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use %s as import target" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -23679,7 +23735,7 @@ invalid_dotted_as_name_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_dotted_as_name[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "dotted_name 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); } _res = NULL; done: @@ -23687,7 +23743,7 @@ invalid_dotted_as_name_rule(Parser *p) return _res; } -// invalid_import_from_as_name: NAME 'as' !(NAME (',' | ')' | NEWLINE)) expression +// invalid_import_from_as_name: NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression static void * invalid_import_from_as_name_rule(Parser *p) { @@ -23700,12 +23756,12 @@ invalid_import_from_as_name_rule(Parser *p) } void * _res = NULL; int _mark = p->mark; - { // NAME 'as' !(NAME (',' | ')' | NEWLINE)) expression + { // NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_import_from_as_name[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + D(fprintf(stderr, "%*c> invalid_import_from_as_name[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); Token * _keyword; expr_ty a; expr_ty name_var; @@ -23714,12 +23770,12 @@ invalid_import_from_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 680)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_139_rule, p) && (a = expression_rule(p)) // expression ) { - D(fprintf(stderr, "%*c+ invalid_import_from_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + D(fprintf(stderr, "%*c+ invalid_import_from_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use %s as import target" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -23730,7 +23786,7 @@ invalid_import_from_as_name_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_import_from_as_name[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | NEWLINE)) expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME 'as' !(NAME (',' | ')' | ';' | NEWLINE)) expression")); } _res = NULL; done: @@ -23832,7 +23888,7 @@ invalid_with_stmt_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_with_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'? 'with' ','.(expression ['as' star_target])+ NEWLINE")); - asdl_seq * _gather_140_var; + asdl_seq * _gather_141_var; Token * _keyword; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -23842,7 +23898,7 @@ invalid_with_stmt_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 647)) // token='with' && - (_gather_140_var = _gather_140_rule(p)) // ','.(expression ['as' star_target])+ + (_gather_141_var = _gather_141_rule(p)) // ','.(expression ['as' star_target])+ && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -23866,7 +23922,7 @@ invalid_with_stmt_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_with_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'? 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' NEWLINE")); - asdl_seq * _gather_142_var; + asdl_seq * _gather_143_var; Token * _keyword; Token * _literal; Token * _literal_1; @@ -23882,7 +23938,7 @@ invalid_with_stmt_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && - (_gather_142_var = _gather_142_rule(p)) // ','.(expressions ['as' star_target])+ + (_gather_143_var = _gather_143_rule(p)) // ','.(expressions ['as' star_target])+ && (_opt_var_1 = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? && @@ -23931,7 +23987,7 @@ invalid_with_stmt_indent_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_with_stmt_indent[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'? 'with' ','.(expression ['as' star_target])+ ':' NEWLINE !INDENT")); - asdl_seq * _gather_140_var; + asdl_seq * _gather_141_var; Token * _literal; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -23942,7 +23998,7 @@ invalid_with_stmt_indent_rule(Parser *p) && (a = _PyPegen_expect_token(p, 647)) // token='with' && - (_gather_140_var = _gather_140_rule(p)) // ','.(expression ['as' star_target])+ + (_gather_141_var = _gather_141_rule(p)) // ','.(expression ['as' star_target])+ && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23970,7 +24026,7 @@ invalid_with_stmt_indent_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_with_stmt_indent[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'? 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' ':' NEWLINE !INDENT")); - asdl_seq * _gather_142_var; + asdl_seq * _gather_143_var; Token * _literal; Token * _literal_1; Token * _literal_2; @@ -23987,7 +24043,7 @@ invalid_with_stmt_indent_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && - (_gather_142_var = _gather_142_rule(p)) // ','.(expressions ['as' star_target])+ + (_gather_143_var = _gather_143_rule(p)) // ','.(expressions ['as' star_target])+ && (_opt_var_1 = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? && @@ -24084,7 +24140,7 @@ invalid_try_stmt_rule(Parser *p) && (block_var = block_rule(p)) // block && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_143_rule, p) + _PyPegen_lookahead(0, _tmp_144_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_try_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'try' ':' block !('except' | 'finally')")); @@ -24109,7 +24165,7 @@ invalid_try_stmt_rule(Parser *p) Token * _keyword; Token * _literal; Token * _literal_1; - asdl_seq * _loop0_144_var; + asdl_seq * _loop0_145_var; asdl_seq * _loop1_37_var; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -24121,7 +24177,7 @@ invalid_try_stmt_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && - (_loop0_144_var = _loop0_144_rule(p)) // block* + (_loop0_145_var = _loop0_145_rule(p)) // block* && (_loop1_37_var = _loop1_37_rule(p)) // except_block+ && @@ -24158,7 +24214,7 @@ invalid_try_stmt_rule(Parser *p) Token * _keyword; Token * _literal; Token * _literal_1; - asdl_seq * _loop0_144_var; + asdl_seq * _loop0_145_var; asdl_seq * _loop1_38_var; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -24168,13 +24224,13 @@ invalid_try_stmt_rule(Parser *p) && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && - (_loop0_144_var = _loop0_144_rule(p)) // block* + (_loop0_145_var = _loop0_145_rule(p)) // block* && (_loop1_38_var = _loop1_38_rule(p)) // except_star_block+ && (a = _PyPegen_expect_token(p, 677)) // token='except' && - (_opt_var = _tmp_145_rule(p), !p->error_indicator) // [expression ['as' NAME]] + (_opt_var = _tmp_146_rule(p), !p->error_indicator) // [expression ['as' NAME]] && (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' ) @@ -24202,7 +24258,7 @@ invalid_try_stmt_rule(Parser *p) // | 'except' expression ',' expressions 'as' NAME ':' // | 'except' expression ['as' NAME] NEWLINE // | 'except' NEWLINE -// | 'except' expression 'as' expression +// | 'except' expression 'as' expression ':' block static void * invalid_except_stmt_rule(Parser *p) { @@ -24318,15 +24374,17 @@ invalid_except_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' NEWLINE")); } - { // 'except' expression 'as' expression + { // 'except' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; + Token * _literal; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24336,9 +24394,13 @@ invalid_except_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24349,7 +24411,7 @@ invalid_except_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression ':' block")); } _res = NULL; done: @@ -24361,7 +24423,7 @@ invalid_except_stmt_rule(Parser *p) // | 'except' '*' expression ',' expressions 'as' NAME ':' // | 'except' '*' expression ['as' NAME] NEWLINE // | 'except' '*' (NEWLINE | ':') -// | 'except' '*' expression 'as' expression +// | 'except' '*' expression 'as' expression ':' block static void * invalid_except_star_stmt_rule(Parser *p) { @@ -24463,14 +24525,14 @@ invalid_except_star_stmt_rule(Parser *p) } D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' (NEWLINE | ':')")); Token * _literal; - void *_tmp_146_var; + void *_tmp_147_var; Token * a; if ( (a = _PyPegen_expect_token(p, 677)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && - (_tmp_146_var = _tmp_146_rule(p)) // NEWLINE | ':' + (_tmp_147_var = _tmp_147_rule(p)) // NEWLINE | ':' ) { D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' (NEWLINE | ':')")); @@ -24486,16 +24548,18 @@ invalid_except_star_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' (NEWLINE | ':')")); } - { // 'except' '*' expression 'as' expression + { // 'except' '*' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; Token * _literal; + Token * _literal_1; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24507,9 +24571,13 @@ invalid_except_star_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except* statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24520,7 +24588,7 @@ invalid_except_star_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); } _res = NULL; done: @@ -25067,7 +25135,7 @@ invalid_class_argument_pattern_rule(Parser *p) asdl_pattern_seq* a; asdl_seq* keyword_patterns_var; if ( - (_opt_var = _tmp_147_rule(p), !p->error_indicator) // [positional_patterns ','] + (_opt_var = _tmp_148_rule(p), !p->error_indicator) // [positional_patterns ','] && (keyword_patterns_var = keyword_patterns_rule(p)) // keyword_patterns && @@ -25799,11 +25867,11 @@ invalid_double_starred_kvpairs_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_double_starred_kvpairs[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.double_starred_kvpair+ ',' invalid_kvpair")); - asdl_seq * _gather_84_var; + asdl_seq * _gather_85_var; Token * _literal; void *invalid_kvpair_var; if ( - (_gather_84_var = _gather_84_rule(p)) // ','.double_starred_kvpair+ + (_gather_85_var = _gather_85_rule(p)) // ','.double_starred_kvpair+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && @@ -25811,7 +25879,7 @@ invalid_double_starred_kvpairs_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ invalid_double_starred_kvpairs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.double_starred_kvpair+ ',' invalid_kvpair")); - _res = _PyPegen_dummy_name(p, _gather_84_var, _literal, invalid_kvpair_var); + _res = _PyPegen_dummy_name(p, _gather_85_var, _literal, invalid_kvpair_var); goto done; } p->mark = _mark; @@ -25864,7 +25932,7 @@ invalid_double_starred_kvpairs_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_double_starred_kvpairs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -25974,7 +26042,7 @@ invalid_kvpair_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_kvpair[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -26233,7 +26301,7 @@ invalid_fstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26262,7 +26330,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26294,7 +26362,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_151_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26358,9 +26426,9 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_153_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -26397,7 +26465,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' && @@ -26438,7 +26506,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 26) // token='}' ) @@ -26485,7 +26553,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_153_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -26511,7 +26579,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -26675,7 +26743,7 @@ invalid_tstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26704,7 +26772,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26736,7 +26804,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_151_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26800,9 +26868,9 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_153_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -26839,7 +26907,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' && @@ -26880,7 +26948,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_opt_var = _PyPegen_expect_token(p, 22), !p->error_indicator) // '='? && - (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] + (_opt_var_1 = _tmp_152_rule(p), !p->error_indicator) // ['!' NAME] && _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 26) // token='}' ) @@ -26927,7 +26995,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_153_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -26953,7 +27021,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -26975,6 +27043,81 @@ invalid_tstring_conversion_character_rule(Parser *p) return _res; } +// invalid_string_tstring_concat: +// | ((fstring | string))+ tstring +// | tstring+ (fstring | string) +static void * +invalid_string_tstring_concat_rule(Parser *p) +{ + if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { + _Pypegen_stack_overflow(p); + } + if (p->error_indicator) { + p->level--; + return NULL; + } + void * _res = NULL; + int _mark = p->mark; + { // ((fstring | string))+ tstring + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> invalid_string_tstring_concat[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "((fstring | string))+ tstring")); + asdl_seq * a; + expr_ty b; + if ( + (a = _loop1_81_rule(p)) // ((fstring | string))+ + && + (b = (expr_ty)tstring_rule(p)) // tstring + ) + { + D(fprintf(stderr, "%*c+ invalid_string_tstring_concat[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "((fstring | string))+ tstring")); + _res = RAISE_SYNTAX_ERROR_KNOWN_RANGE ( PyPegen_last_item ( a , expr_ty ) , b , "cannot mix t-string literals with string or bytes literals" ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s invalid_string_tstring_concat[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "((fstring | string))+ tstring")); + } + { // tstring+ (fstring | string) + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> invalid_string_tstring_concat[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tstring+ (fstring | string)")); + asdl_seq * a; + expr_ty b; + if ( + (a = _loop1_82_rule(p)) // tstring+ + && + (b = (expr_ty)_tmp_154_rule(p)) // fstring | string + ) + { + D(fprintf(stderr, "%*c+ invalid_string_tstring_concat[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "tstring+ (fstring | string)")); + _res = RAISE_SYNTAX_ERROR_KNOWN_RANGE ( PyPegen_last_item ( a , expr_ty ) , b , "cannot mix t-string literals with string or bytes literals" ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s invalid_string_tstring_concat[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "tstring+ (fstring | string)")); + } + _res = NULL; + done: + p->level--; + return _res; +} + // invalid_arithmetic: sum ('+' | '-' | '*' | '/' | '%' | '//' | '@') 'not' inversion static void * invalid_arithmetic_rule(Parser *p) @@ -26994,14 +27137,14 @@ invalid_arithmetic_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_arithmetic[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "sum ('+' | '-' | '*' | '/' | '%' | '//' | '@') 'not' inversion")); - void *_tmp_153_var; + void *_tmp_155_var; Token * a; expr_ty b; expr_ty sum_var; if ( (sum_var = sum_rule(p)) // sum && - (_tmp_153_var = _tmp_153_rule(p)) // '+' | '-' | '*' | '/' | '%' | '//' | '@' + (_tmp_155_var = _tmp_155_rule(p)) // '+' | '-' | '*' | '/' | '%' | '//' | '@' && (a = _PyPegen_expect_token(p, 703)) // token='not' && @@ -27046,11 +27189,11 @@ invalid_factor_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> invalid_factor[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('+' | '-' | '~') 'not' factor")); - void *_tmp_154_var; + void *_tmp_156_var; Token * a; expr_ty b; if ( - (_tmp_154_var = _tmp_154_rule(p)) // '+' | '-' | '~' + (_tmp_156_var = _tmp_156_rule(p)) // '+' | '-' | '~' && (a = _PyPegen_expect_token(p, 703)) // token='not' && @@ -27824,12 +27967,12 @@ _loop1_12_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_12[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(star_targets '=')")); - void *_tmp_155_var; + void *_tmp_157_var; while ( - (_tmp_155_var = _tmp_155_rule(p)) // star_targets '=' + (_tmp_157_var = _tmp_157_rule(p)) // star_targets '=' ) { - _res = _tmp_155_var; + _res = _tmp_157_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -28162,12 +28305,12 @@ _loop0_18_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop0_18[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('.' | '...')")); - void *_tmp_156_var; + void *_tmp_158_var; while ( - (_tmp_156_var = _tmp_156_rule(p)) // '.' | '...' + (_tmp_158_var = _tmp_158_rule(p)) // '.' | '...' ) { - _res = _tmp_156_var; + _res = _tmp_158_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -28229,12 +28372,12 @@ _loop1_19_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_19[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('.' | '...')")); - void *_tmp_156_var; + void *_tmp_158_var; while ( - (_tmp_156_var = _tmp_156_rule(p)) // '.' | '...' + (_tmp_158_var = _tmp_158_rule(p)) // '.' | '...' ) { - _res = _tmp_156_var; + _res = _tmp_158_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -28581,12 +28724,12 @@ _loop1_25_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_25[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('@' named_expression NEWLINE)")); - void *_tmp_157_var; + void *_tmp_159_var; while ( - (_tmp_157_var = _tmp_157_rule(p)) // '@' named_expression NEWLINE + (_tmp_159_var = _tmp_159_rule(p)) // '@' named_expression NEWLINE ) { - _res = _tmp_157_var; + _res = _tmp_159_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -30614,12 +30757,12 @@ _loop1_57_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_57[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' star_expression)")); - void *_tmp_158_var; + void *_tmp_160_var; while ( - (_tmp_158_var = _tmp_158_rule(p)) // ',' star_expression + (_tmp_160_var = _tmp_160_rule(p)) // ',' star_expression ) { - _res = _tmp_158_var; + _res = _tmp_160_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -30803,12 +30946,12 @@ _loop1_60_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_60[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('or' conjunction)")); - void *_tmp_159_var; + void *_tmp_161_var; while ( - (_tmp_159_var = _tmp_159_rule(p)) // 'or' conjunction + (_tmp_161_var = _tmp_161_rule(p)) // 'or' conjunction ) { - _res = _tmp_159_var; + _res = _tmp_161_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -30875,12 +31018,12 @@ _loop1_61_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> _loop1_61[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('and' inversion)")); - void *_tmp_160_var; + void *_tmp_162_var; while ( - (_tmp_160_var = _tmp_160_rule(p)) // 'and' inversion + (_tmp_162_var = _tmp_162_rule(p)) // 'and' inversion ) { - _res = _tmp_160_var; + _res = _tmp_162_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -31067,7 +31210,7 @@ _loop0_64_rule(Parser *p) while ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (elem = _tmp_161_rule(p)) // slice | starred_expression + (elem = _tmp_163_rule(p)) // slice | starred_expression ) { _res = elem; @@ -31132,7 +31275,7 @@ _gather_65_rule(Parser *p) void *elem; asdl_seq * seq; if ( - (elem = _tmp_161_rule(p)) // slice | starred_expression + (elem = _tmp_163_rule(p)) // slice | starred_expression && (seq = _loop0_64_rule(p)) // _loop0_64 ) @@ -32167,7 +32310,7 @@ _loop0_80_rule(Parser *p) return _seq; } -// _loop1_81: (fstring | string | tstring) +// _loop1_81: (fstring | string) static asdl_seq * _loop1_81_rule(Parser *p) { @@ -32189,18 +32332,18 @@ _loop1_81_rule(Parser *p) } Py_ssize_t _children_capacity = 1; Py_ssize_t _n = 0; - { // (fstring | string | tstring) + { // (fstring | string) if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop1_81[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(fstring | string | tstring)")); - void *_tmp_162_var; + D(fprintf(stderr, "%*c> _loop1_81[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(fstring | string)")); + void *_tmp_154_var; while ( - (_tmp_162_var = _tmp_162_rule(p)) // fstring | string | tstring + (_tmp_154_var = _tmp_154_rule(p)) // fstring | string ) { - _res = _tmp_162_var; + _res = _tmp_154_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -32218,7 +32361,7 @@ _loop1_81_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s _loop1_81[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(fstring | string | tstring)")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(fstring | string)")); } if (_n == 0 || p->error_indicator) { PyMem_Free(_children); @@ -32239,9 +32382,81 @@ _loop1_81_rule(Parser *p) return _seq; } -// _tmp_82: star_named_expression ',' star_named_expressions? +// _loop1_82: tstring +static asdl_seq * +_loop1_82_rule(Parser *p) +{ + if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { + _Pypegen_stack_overflow(p); + } + if (p->error_indicator) { + p->level--; + return NULL; + } + void *_res = NULL; + int _mark = p->mark; + void **_children = PyMem_Malloc(sizeof(void *)); + if (!_children) { + p->error_indicator = 1; + PyErr_NoMemory(); + p->level--; + return NULL; + } + Py_ssize_t _children_capacity = 1; + Py_ssize_t _n = 0; + { // tstring + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> _loop1_82[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tstring")); + expr_ty tstring_var; + while ( + (tstring_var = tstring_rule(p)) // tstring + ) + { + _res = tstring_var; + if (_n == _children_capacity) { + _children_capacity *= 2; + void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); + if (!_new_children) { + PyMem_Free(_children); + p->error_indicator = 1; + PyErr_NoMemory(); + p->level--; + return NULL; + } + _children = _new_children; + } + _children[_n++] = _res; + _mark = p->mark; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s _loop1_82[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "tstring")); + } + if (_n == 0 || p->error_indicator) { + PyMem_Free(_children); + p->level--; + return NULL; + } + asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); + if (!_seq) { + PyMem_Free(_children); + p->error_indicator = 1; + PyErr_NoMemory(); + p->level--; + return NULL; + } + for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]); + PyMem_Free(_children); + p->level--; + return _seq; +} + +// _tmp_83: star_named_expression ',' star_named_expressions? static void * -_tmp_82_rule(Parser *p) +_tmp_83_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32257,7 +32472,7 @@ _tmp_82_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_82[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_named_expression ',' star_named_expressions?")); + D(fprintf(stderr, "%*c> _tmp_83[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_named_expression ',' star_named_expressions?")); Token * _literal; expr_ty y; void *z; @@ -32269,7 +32484,7 @@ _tmp_82_rule(Parser *p) (z = star_named_expressions_rule(p), !p->error_indicator) // star_named_expressions? ) { - D(fprintf(stderr, "%*c+ _tmp_82[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_named_expression ',' star_named_expressions?")); + D(fprintf(stderr, "%*c+ _tmp_83[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_named_expression ',' star_named_expressions?")); _res = _PyPegen_seq_insert_in_front ( p , y , z ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -32279,7 +32494,7 @@ _tmp_82_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_82[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_83[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "star_named_expression ',' star_named_expressions?")); } _res = NULL; @@ -32288,9 +32503,9 @@ _tmp_82_rule(Parser *p) return _res; } -// _loop0_83: ',' double_starred_kvpair +// _loop0_84: ',' double_starred_kvpair static asdl_seq * -_loop0_83_rule(Parser *p) +_loop0_84_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32315,7 +32530,7 @@ _loop0_83_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_83[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' double_starred_kvpair")); + D(fprintf(stderr, "%*c> _loop0_84[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' double_starred_kvpair")); Token * _literal; KeyValuePair* elem; while ( @@ -32347,7 +32562,7 @@ _loop0_83_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_83[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_84[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' double_starred_kvpair")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -32364,9 +32579,9 @@ _loop0_83_rule(Parser *p) return _seq; } -// _gather_84: double_starred_kvpair _loop0_83 +// _gather_85: double_starred_kvpair _loop0_84 static asdl_seq * -_gather_84_rule(Parser *p) +_gather_85_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32377,27 +32592,27 @@ _gather_84_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // double_starred_kvpair _loop0_83 + { // double_starred_kvpair _loop0_84 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_84[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "double_starred_kvpair _loop0_83")); + D(fprintf(stderr, "%*c> _gather_85[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "double_starred_kvpair _loop0_84")); KeyValuePair* elem; asdl_seq * seq; if ( (elem = double_starred_kvpair_rule(p)) // double_starred_kvpair && - (seq = _loop0_83_rule(p)) // _loop0_83 + (seq = _loop0_84_rule(p)) // _loop0_84 ) { - D(fprintf(stderr, "%*c+ _gather_84[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "double_starred_kvpair _loop0_83")); + D(fprintf(stderr, "%*c+ _gather_85[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "double_starred_kvpair _loop0_84")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_84[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "double_starred_kvpair _loop0_83")); + D(fprintf(stderr, "%*c%s _gather_85[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "double_starred_kvpair _loop0_84")); } _res = NULL; done: @@ -32405,9 +32620,9 @@ _gather_84_rule(Parser *p) return _res; } -// _loop1_85: for_if_clause +// _loop1_86: for_if_clause static asdl_seq * -_loop1_85_rule(Parser *p) +_loop1_86_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32432,7 +32647,7 @@ _loop1_85_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop1_85[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "for_if_clause")); + D(fprintf(stderr, "%*c> _loop1_86[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "for_if_clause")); comprehension_ty for_if_clause_var; while ( (for_if_clause_var = for_if_clause_rule(p)) // for_if_clause @@ -32455,7 +32670,7 @@ _loop1_85_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop1_85[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop1_86[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "for_if_clause")); } if (_n == 0 || p->error_indicator) { @@ -32477,9 +32692,9 @@ _loop1_85_rule(Parser *p) return _seq; } -// _loop0_86: ('if' disjunction) +// _loop0_87: ('if' disjunction) static asdl_seq * -_loop0_86_rule(Parser *p) +_loop0_87_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32504,13 +32719,13 @@ _loop0_86_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_86[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('if' disjunction)")); - void *_tmp_163_var; + D(fprintf(stderr, "%*c> _loop0_87[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "('if' disjunction)")); + void *_tmp_164_var; while ( - (_tmp_163_var = _tmp_163_rule(p)) // 'if' disjunction + (_tmp_164_var = _tmp_164_rule(p)) // 'if' disjunction ) { - _res = _tmp_163_var; + _res = _tmp_164_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -32527,7 +32742,7 @@ _loop0_86_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_86[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_87[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "('if' disjunction)")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -32544,9 +32759,9 @@ _loop0_86_rule(Parser *p) return _seq; } -// _tmp_87: assignment_expression | expression !':=' +// _tmp_88: assignment_expression | expression !':=' static void * -_tmp_87_rule(Parser *p) +_tmp_88_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32562,18 +32777,18 @@ _tmp_87_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_87[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "assignment_expression")); + D(fprintf(stderr, "%*c> _tmp_88[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "assignment_expression")); expr_ty assignment_expression_var; if ( (assignment_expression_var = assignment_expression_rule(p)) // assignment_expression ) { - D(fprintf(stderr, "%*c+ _tmp_87[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "assignment_expression")); + D(fprintf(stderr, "%*c+ _tmp_88[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "assignment_expression")); _res = assignment_expression_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_87[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_88[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "assignment_expression")); } { // expression !':=' @@ -32581,7 +32796,7 @@ _tmp_87_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_87[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression !':='")); + D(fprintf(stderr, "%*c> _tmp_88[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression !':='")); expr_ty expression_var; if ( (expression_var = expression_rule(p)) // expression @@ -32589,12 +32804,12 @@ _tmp_87_rule(Parser *p) _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 53) // token=':=' ) { - D(fprintf(stderr, "%*c+ _tmp_87[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression !':='")); + D(fprintf(stderr, "%*c+ _tmp_88[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression !':='")); _res = expression_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_87[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_88[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression !':='")); } _res = NULL; @@ -32603,9 +32818,9 @@ _tmp_87_rule(Parser *p) return _res; } -// _loop0_88: ',' (starred_expression | (assignment_expression | expression !':=') !'=') +// _loop0_89: ',' (starred_expression | (assignment_expression | expression !':=') !'=') static asdl_seq * -_loop0_88_rule(Parser *p) +_loop0_89_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32630,13 +32845,13 @@ _loop0_88_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_88[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (starred_expression | (assignment_expression | expression !':=') !'=')")); + D(fprintf(stderr, "%*c> _loop0_89[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (starred_expression | (assignment_expression | expression !':=') !'=')")); Token * _literal; void *elem; while ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (elem = _tmp_164_rule(p)) // starred_expression | (assignment_expression | expression !':=') !'=' + (elem = _tmp_165_rule(p)) // starred_expression | (assignment_expression | expression !':=') !'=' ) { _res = elem; @@ -32662,7 +32877,7 @@ _loop0_88_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_88[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_89[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (starred_expression | (assignment_expression | expression !':=') !'=')")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -32679,10 +32894,10 @@ _loop0_88_rule(Parser *p) return _seq; } -// _gather_89: -// | (starred_expression | (assignment_expression | expression !':=') !'=') _loop0_88 +// _gather_90: +// | (starred_expression | (assignment_expression | expression !':=') !'=') _loop0_89 static asdl_seq * -_gather_89_rule(Parser *p) +_gather_90_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32693,27 +32908,27 @@ _gather_89_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // (starred_expression | (assignment_expression | expression !':=') !'=') _loop0_88 + { // (starred_expression | (assignment_expression | expression !':=') !'=') _loop0_89 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_89[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_88")); + D(fprintf(stderr, "%*c> _gather_90[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_89")); void *elem; asdl_seq * seq; if ( - (elem = _tmp_164_rule(p)) // starred_expression | (assignment_expression | expression !':=') !'=' + (elem = _tmp_165_rule(p)) // starred_expression | (assignment_expression | expression !':=') !'=' && - (seq = _loop0_88_rule(p)) // _loop0_88 + (seq = _loop0_89_rule(p)) // _loop0_89 ) { - D(fprintf(stderr, "%*c+ _gather_89[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_88")); + D(fprintf(stderr, "%*c+ _gather_90[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_89")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_89[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_88")); + D(fprintf(stderr, "%*c%s _gather_90[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(starred_expression | (assignment_expression | expression !':=') !'=') _loop0_89")); } _res = NULL; done: @@ -32721,9 +32936,9 @@ _gather_89_rule(Parser *p) return _res; } -// _tmp_90: ',' kwargs +// _tmp_91: ',' kwargs static void * -_tmp_90_rule(Parser *p) +_tmp_91_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32739,7 +32954,7 @@ _tmp_90_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_90[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwargs")); + D(fprintf(stderr, "%*c> _tmp_91[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwargs")); Token * _literal; asdl_seq* k; if ( @@ -32748,7 +32963,7 @@ _tmp_90_rule(Parser *p) (k = kwargs_rule(p)) // kwargs ) { - D(fprintf(stderr, "%*c+ _tmp_90[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' kwargs")); + D(fprintf(stderr, "%*c+ _tmp_91[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' kwargs")); _res = k; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -32758,7 +32973,7 @@ _tmp_90_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_90[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_91[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' kwargs")); } _res = NULL; @@ -32767,9 +32982,9 @@ _tmp_90_rule(Parser *p) return _res; } -// _loop0_91: ',' kwarg_or_starred +// _loop0_92: ',' kwarg_or_starred static asdl_seq * -_loop0_91_rule(Parser *p) +_loop0_92_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32794,7 +33009,7 @@ _loop0_91_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_91[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwarg_or_starred")); + D(fprintf(stderr, "%*c> _loop0_92[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwarg_or_starred")); Token * _literal; KeywordOrStarred* elem; while ( @@ -32826,7 +33041,7 @@ _loop0_91_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_91[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_92[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' kwarg_or_starred")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -32843,9 +33058,9 @@ _loop0_91_rule(Parser *p) return _seq; } -// _gather_92: kwarg_or_starred _loop0_91 +// _gather_93: kwarg_or_starred _loop0_92 static asdl_seq * -_gather_92_rule(Parser *p) +_gather_93_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32856,27 +33071,27 @@ _gather_92_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // kwarg_or_starred _loop0_91 + { // kwarg_or_starred _loop0_92 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_92[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwarg_or_starred _loop0_91")); + D(fprintf(stderr, "%*c> _gather_93[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwarg_or_starred _loop0_92")); KeywordOrStarred* elem; asdl_seq * seq; if ( (elem = kwarg_or_starred_rule(p)) // kwarg_or_starred && - (seq = _loop0_91_rule(p)) // _loop0_91 + (seq = _loop0_92_rule(p)) // _loop0_92 ) { - D(fprintf(stderr, "%*c+ _gather_92[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwarg_or_starred _loop0_91")); + D(fprintf(stderr, "%*c+ _gather_93[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwarg_or_starred _loop0_92")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_92[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwarg_or_starred _loop0_91")); + D(fprintf(stderr, "%*c%s _gather_93[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwarg_or_starred _loop0_92")); } _res = NULL; done: @@ -32884,9 +33099,9 @@ _gather_92_rule(Parser *p) return _res; } -// _loop0_93: ',' kwarg_or_double_starred +// _loop0_94: ',' kwarg_or_double_starred static asdl_seq * -_loop0_93_rule(Parser *p) +_loop0_94_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32911,7 +33126,7 @@ _loop0_93_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_93[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwarg_or_double_starred")); + D(fprintf(stderr, "%*c> _loop0_94[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' kwarg_or_double_starred")); Token * _literal; KeywordOrStarred* elem; while ( @@ -32943,7 +33158,7 @@ _loop0_93_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_93[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_94[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' kwarg_or_double_starred")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -32960,9 +33175,9 @@ _loop0_93_rule(Parser *p) return _seq; } -// _gather_94: kwarg_or_double_starred _loop0_93 +// _gather_95: kwarg_or_double_starred _loop0_94 static asdl_seq * -_gather_94_rule(Parser *p) +_gather_95_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -32973,27 +33188,27 @@ _gather_94_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // kwarg_or_double_starred _loop0_93 + { // kwarg_or_double_starred _loop0_94 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_94[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwarg_or_double_starred _loop0_93")); + D(fprintf(stderr, "%*c> _gather_95[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwarg_or_double_starred _loop0_94")); KeywordOrStarred* elem; asdl_seq * seq; if ( (elem = kwarg_or_double_starred_rule(p)) // kwarg_or_double_starred && - (seq = _loop0_93_rule(p)) // _loop0_93 + (seq = _loop0_94_rule(p)) // _loop0_94 ) { - D(fprintf(stderr, "%*c+ _gather_94[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwarg_or_double_starred _loop0_93")); + D(fprintf(stderr, "%*c+ _gather_95[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwarg_or_double_starred _loop0_94")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_94[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwarg_or_double_starred _loop0_93")); + D(fprintf(stderr, "%*c%s _gather_95[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwarg_or_double_starred _loop0_94")); } _res = NULL; done: @@ -33001,9 +33216,9 @@ _gather_94_rule(Parser *p) return _res; } -// _loop0_95: (',' star_target) +// _loop0_96: (',' star_target) static asdl_seq * -_loop0_95_rule(Parser *p) +_loop0_96_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33028,13 +33243,13 @@ _loop0_95_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_95[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' star_target)")); - void *_tmp_165_var; + D(fprintf(stderr, "%*c> _loop0_96[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' star_target)")); + void *_tmp_166_var; while ( - (_tmp_165_var = _tmp_165_rule(p)) // ',' star_target + (_tmp_166_var = _tmp_166_rule(p)) // ',' star_target ) { - _res = _tmp_165_var; + _res = _tmp_166_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -33051,7 +33266,7 @@ _loop0_95_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_95[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_96[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(',' star_target)")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -33068,9 +33283,9 @@ _loop0_95_rule(Parser *p) return _seq; } -// _loop0_96: ',' star_target +// _loop0_97: ',' star_target static asdl_seq * -_loop0_96_rule(Parser *p) +_loop0_97_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33095,7 +33310,7 @@ _loop0_96_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_96[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_target")); + D(fprintf(stderr, "%*c> _loop0_97[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_target")); Token * _literal; expr_ty elem; while ( @@ -33127,7 +33342,7 @@ _loop0_96_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_96[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_97[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' star_target")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -33144,9 +33359,9 @@ _loop0_96_rule(Parser *p) return _seq; } -// _gather_97: star_target _loop0_96 +// _gather_98: star_target _loop0_97 static asdl_seq * -_gather_97_rule(Parser *p) +_gather_98_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33157,27 +33372,27 @@ _gather_97_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // star_target _loop0_96 + { // star_target _loop0_97 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_97[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_target _loop0_96")); + D(fprintf(stderr, "%*c> _gather_98[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_target _loop0_97")); expr_ty elem; asdl_seq * seq; if ( (elem = star_target_rule(p)) // star_target && - (seq = _loop0_96_rule(p)) // _loop0_96 + (seq = _loop0_97_rule(p)) // _loop0_97 ) { - D(fprintf(stderr, "%*c+ _gather_97[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target _loop0_96")); + D(fprintf(stderr, "%*c+ _gather_98[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target _loop0_97")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_97[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "star_target _loop0_96")); + D(fprintf(stderr, "%*c%s _gather_98[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "star_target _loop0_97")); } _res = NULL; done: @@ -33185,9 +33400,9 @@ _gather_97_rule(Parser *p) return _res; } -// _loop1_98: (',' star_target) +// _loop1_99: (',' star_target) static asdl_seq * -_loop1_98_rule(Parser *p) +_loop1_99_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33212,13 +33427,13 @@ _loop1_98_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop1_98[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' star_target)")); - void *_tmp_165_var; + D(fprintf(stderr, "%*c> _loop1_99[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' star_target)")); + void *_tmp_166_var; while ( - (_tmp_165_var = _tmp_165_rule(p)) // ',' star_target + (_tmp_166_var = _tmp_166_rule(p)) // ',' star_target ) { - _res = _tmp_165_var; + _res = _tmp_166_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -33235,7 +33450,7 @@ _loop1_98_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop1_98[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop1_99[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(',' star_target)")); } if (_n == 0 || p->error_indicator) { @@ -33257,9 +33472,9 @@ _loop1_98_rule(Parser *p) return _seq; } -// _tmp_99: !'*' star_target +// _tmp_100: !'*' star_target static void * -_tmp_99_rule(Parser *p) +_tmp_100_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33275,7 +33490,7 @@ _tmp_99_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_99[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!'*' star_target")); + D(fprintf(stderr, "%*c> _tmp_100[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!'*' star_target")); expr_ty star_target_var; if ( _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 16) // token='*' @@ -33283,12 +33498,12 @@ _tmp_99_rule(Parser *p) (star_target_var = star_target_rule(p)) // star_target ) { - D(fprintf(stderr, "%*c+ _tmp_99[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!'*' star_target")); + D(fprintf(stderr, "%*c+ _tmp_100[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!'*' star_target")); _res = star_target_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_99[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_100[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "!'*' star_target")); } _res = NULL; @@ -33297,9 +33512,9 @@ _tmp_99_rule(Parser *p) return _res; } -// _loop0_100: ',' del_target +// _loop0_101: ',' del_target static asdl_seq * -_loop0_100_rule(Parser *p) +_loop0_101_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33324,7 +33539,7 @@ _loop0_100_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_100[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' del_target")); + D(fprintf(stderr, "%*c> _loop0_101[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' del_target")); Token * _literal; expr_ty elem; while ( @@ -33356,7 +33571,7 @@ _loop0_100_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_100[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_101[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' del_target")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -33373,9 +33588,9 @@ _loop0_100_rule(Parser *p) return _seq; } -// _gather_101: del_target _loop0_100 +// _gather_102: del_target _loop0_101 static asdl_seq * -_gather_101_rule(Parser *p) +_gather_102_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33386,27 +33601,27 @@ _gather_101_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // del_target _loop0_100 + { // del_target _loop0_101 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_101[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "del_target _loop0_100")); + D(fprintf(stderr, "%*c> _gather_102[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "del_target _loop0_101")); expr_ty elem; asdl_seq * seq; if ( (elem = del_target_rule(p)) // del_target && - (seq = _loop0_100_rule(p)) // _loop0_100 + (seq = _loop0_101_rule(p)) // _loop0_101 ) { - D(fprintf(stderr, "%*c+ _gather_101[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "del_target _loop0_100")); + D(fprintf(stderr, "%*c+ _gather_102[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "del_target _loop0_101")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_101[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "del_target _loop0_100")); + D(fprintf(stderr, "%*c%s _gather_102[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "del_target _loop0_101")); } _res = NULL; done: @@ -33414,9 +33629,9 @@ _gather_101_rule(Parser *p) return _res; } -// _loop0_102: ',' expression +// _loop0_103: ',' expression static asdl_seq * -_loop0_102_rule(Parser *p) +_loop0_103_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33441,7 +33656,7 @@ _loop0_102_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_102[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' expression")); + D(fprintf(stderr, "%*c> _loop0_103[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' expression")); Token * _literal; expr_ty elem; while ( @@ -33473,7 +33688,7 @@ _loop0_102_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_102[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_103[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' expression")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -33490,9 +33705,9 @@ _loop0_102_rule(Parser *p) return _seq; } -// _gather_103: expression _loop0_102 +// _gather_104: expression _loop0_103 static asdl_seq * -_gather_103_rule(Parser *p) +_gather_104_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33503,27 +33718,27 @@ _gather_103_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // expression _loop0_102 + { // expression _loop0_103 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_103[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression _loop0_102")); + D(fprintf(stderr, "%*c> _gather_104[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression _loop0_103")); expr_ty elem; asdl_seq * seq; if ( (elem = expression_rule(p)) // expression && - (seq = _loop0_102_rule(p)) // _loop0_102 + (seq = _loop0_103_rule(p)) // _loop0_103 ) { - D(fprintf(stderr, "%*c+ _gather_103[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression _loop0_102")); + D(fprintf(stderr, "%*c+ _gather_104[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression _loop0_103")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_103[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression _loop0_102")); + D(fprintf(stderr, "%*c%s _gather_104[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression _loop0_103")); } _res = NULL; done: @@ -33531,9 +33746,9 @@ _gather_103_rule(Parser *p) return _res; } -// _tmp_104: NEWLINE INDENT +// _tmp_105: NEWLINE INDENT static void * -_tmp_104_rule(Parser *p) +_tmp_105_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33549,7 +33764,7 @@ _tmp_104_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_104[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE INDENT")); + D(fprintf(stderr, "%*c> _tmp_105[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE INDENT")); Token * indent_var; Token * newline_var; if ( @@ -33558,12 +33773,12 @@ _tmp_104_rule(Parser *p) (indent_var = _PyPegen_expect_token(p, INDENT)) // token='INDENT' ) { - D(fprintf(stderr, "%*c+ _tmp_104[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE INDENT")); + D(fprintf(stderr, "%*c+ _tmp_105[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE INDENT")); _res = _PyPegen_dummy_name(p, newline_var, indent_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_104[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_105[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NEWLINE INDENT")); } _res = NULL; @@ -33572,11 +33787,11 @@ _tmp_104_rule(Parser *p) return _res; } -// _tmp_105: +// _tmp_106: // | (','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs) // | kwargs static void * -_tmp_105_rule(Parser *p) +_tmp_106_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33592,18 +33807,18 @@ _tmp_105_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_105[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs)")); - void *_tmp_166_var; + D(fprintf(stderr, "%*c> _tmp_106[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs)")); + void *_tmp_167_var; if ( - (_tmp_166_var = _tmp_166_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs + (_tmp_167_var = _tmp_167_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs ) { - D(fprintf(stderr, "%*c+ _tmp_105[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs)")); - _res = _tmp_166_var; + D(fprintf(stderr, "%*c+ _tmp_106[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs)")); + _res = _tmp_167_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_105[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_106[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs)")); } { // kwargs @@ -33611,18 +33826,18 @@ _tmp_105_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_105[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwargs")); + D(fprintf(stderr, "%*c> _tmp_106[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "kwargs")); asdl_seq* kwargs_var; if ( (kwargs_var = kwargs_rule(p)) // kwargs ) { - D(fprintf(stderr, "%*c+ _tmp_105[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwargs")); + D(fprintf(stderr, "%*c+ _tmp_106[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "kwargs")); _res = kwargs_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_105[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_106[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwargs")); } _res = NULL; @@ -33631,9 +33846,9 @@ _tmp_105_rule(Parser *p) return _res; } -// _loop0_106: ',' (starred_expression !'=') +// _loop0_107: ',' (starred_expression !'=') static asdl_seq * -_loop0_106_rule(Parser *p) +_loop0_107_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33658,13 +33873,13 @@ _loop0_106_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_106[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (starred_expression !'=')")); + D(fprintf(stderr, "%*c> _loop0_107[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (starred_expression !'=')")); Token * _literal; void *elem; while ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (elem = _tmp_167_rule(p)) // starred_expression !'=' + (elem = _tmp_168_rule(p)) // starred_expression !'=' ) { _res = elem; @@ -33690,7 +33905,7 @@ _loop0_106_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_106[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_107[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (starred_expression !'=')")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -33707,9 +33922,9 @@ _loop0_106_rule(Parser *p) return _seq; } -// _gather_107: (starred_expression !'=') _loop0_106 +// _gather_108: (starred_expression !'=') _loop0_107 static asdl_seq * -_gather_107_rule(Parser *p) +_gather_108_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33720,27 +33935,27 @@ _gather_107_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // (starred_expression !'=') _loop0_106 + { // (starred_expression !'=') _loop0_107 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_107[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(starred_expression !'=') _loop0_106")); + D(fprintf(stderr, "%*c> _gather_108[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(starred_expression !'=') _loop0_107")); void *elem; asdl_seq * seq; if ( - (elem = _tmp_167_rule(p)) // starred_expression !'=' + (elem = _tmp_168_rule(p)) // starred_expression !'=' && - (seq = _loop0_106_rule(p)) // _loop0_106 + (seq = _loop0_107_rule(p)) // _loop0_107 ) { - D(fprintf(stderr, "%*c+ _gather_107[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(starred_expression !'=') _loop0_106")); + D(fprintf(stderr, "%*c+ _gather_108[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(starred_expression !'=') _loop0_107")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_107[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(starred_expression !'=') _loop0_106")); + D(fprintf(stderr, "%*c%s _gather_108[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(starred_expression !'=') _loop0_107")); } _res = NULL; done: @@ -33748,9 +33963,9 @@ _gather_107_rule(Parser *p) return _res; } -// _tmp_108: args | expression for_if_clauses +// _tmp_109: args | expression for_if_clauses static void * -_tmp_108_rule(Parser *p) +_tmp_109_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33766,18 +33981,18 @@ _tmp_108_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_108[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args")); + D(fprintf(stderr, "%*c> _tmp_109[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args")); expr_ty args_var; if ( (args_var = args_rule(p)) // args ) { - D(fprintf(stderr, "%*c+ _tmp_108[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args")); + D(fprintf(stderr, "%*c+ _tmp_109[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args")); _res = args_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_108[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_109[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args")); } { // expression for_if_clauses @@ -33785,7 +34000,7 @@ _tmp_108_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_108[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses")); + D(fprintf(stderr, "%*c> _tmp_109[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses")); expr_ty expression_var; asdl_comprehension_seq* for_if_clauses_var; if ( @@ -33794,12 +34009,12 @@ _tmp_108_rule(Parser *p) (for_if_clauses_var = for_if_clauses_rule(p)) // for_if_clauses ) { - D(fprintf(stderr, "%*c+ _tmp_108[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses")); + D(fprintf(stderr, "%*c+ _tmp_109[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses")); _res = _PyPegen_dummy_name(p, expression_var, for_if_clauses_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_108[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_109[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression for_if_clauses")); } _res = NULL; @@ -33808,9 +34023,9 @@ _tmp_108_rule(Parser *p) return _res; } -// _tmp_109: args ',' +// _tmp_110: args ',' static void * -_tmp_109_rule(Parser *p) +_tmp_110_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33826,7 +34041,7 @@ _tmp_109_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_109[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ','")); + D(fprintf(stderr, "%*c> _tmp_110[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ','")); Token * _literal; expr_ty args_var; if ( @@ -33835,12 +34050,12 @@ _tmp_109_rule(Parser *p) (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_109[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ','")); + D(fprintf(stderr, "%*c+ _tmp_110[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ','")); _res = _PyPegen_dummy_name(p, args_var, _literal); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_109[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_110[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ','")); } _res = NULL; @@ -33849,9 +34064,9 @@ _tmp_109_rule(Parser *p) return _res; } -// _tmp_110: ',' | ')' +// _tmp_111: ',' | ')' static void * -_tmp_110_rule(Parser *p) +_tmp_111_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33867,18 +34082,18 @@ _tmp_110_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_110[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_111[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_110[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_111[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_110[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_111[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } { // ')' @@ -33886,18 +34101,18 @@ _tmp_110_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_110[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c> _tmp_111[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 8)) // token=')' ) { - D(fprintf(stderr, "%*c+ _tmp_110[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c+ _tmp_111[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_110[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_111[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "')'")); } _res = NULL; @@ -33906,9 +34121,9 @@ _tmp_110_rule(Parser *p) return _res; } -// _tmp_111: 'True' | 'False' | 'None' +// _tmp_112: 'True' | 'False' | 'None' static void * -_tmp_111_rule(Parser *p) +_tmp_112_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -33924,18 +34139,18 @@ _tmp_111_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_111[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); + D(fprintf(stderr, "%*c> _tmp_112[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 622)) // token='True' ) { - D(fprintf(stderr, "%*c+ _tmp_111[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); + D(fprintf(stderr, "%*c+ _tmp_112[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_111[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_112[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'True'")); } { // 'False' @@ -33943,18 +34158,18 @@ _tmp_111_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_111[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); + D(fprintf(stderr, "%*c> _tmp_112[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 624)) // token='False' ) { - D(fprintf(stderr, "%*c+ _tmp_111[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); + D(fprintf(stderr, "%*c+ _tmp_112[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_111[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_112[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'False'")); } { // 'None' @@ -33962,18 +34177,18 @@ _tmp_111_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_111[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); + D(fprintf(stderr, "%*c> _tmp_112[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 623)) // token='None' ) { - D(fprintf(stderr, "%*c+ _tmp_111[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); + D(fprintf(stderr, "%*c+ _tmp_112[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_111[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_112[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'None'")); } _res = NULL; @@ -33982,9 +34197,9 @@ _tmp_111_rule(Parser *p) return _res; } -// _tmp_112: NAME '=' +// _tmp_113: NAME '=' static void * -_tmp_112_rule(Parser *p) +_tmp_113_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34000,7 +34215,7 @@ _tmp_112_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_112[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME '='")); + D(fprintf(stderr, "%*c> _tmp_113[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME '='")); Token * _literal; expr_ty name_var; if ( @@ -34009,12 +34224,12 @@ _tmp_112_rule(Parser *p) (_literal = _PyPegen_expect_token(p, 22)) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_112[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME '='")); + D(fprintf(stderr, "%*c+ _tmp_113[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME '='")); _res = _PyPegen_dummy_name(p, name_var, _literal); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_112[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_113[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME '='")); } _res = NULL; @@ -34023,9 +34238,9 @@ _tmp_112_rule(Parser *p) return _res; } -// _loop1_113: (!STRING expression_without_invalid) +// _loop1_114: (!STRING expression_without_invalid) static asdl_seq * -_loop1_113_rule(Parser *p) +_loop1_114_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34050,13 +34265,13 @@ _loop1_113_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop1_113[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(!STRING expression_without_invalid)")); - void *_tmp_168_var; + D(fprintf(stderr, "%*c> _loop1_114[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(!STRING expression_without_invalid)")); + void *_tmp_169_var; while ( - (_tmp_168_var = _tmp_168_rule(p)) // !STRING expression_without_invalid + (_tmp_169_var = _tmp_169_rule(p)) // !STRING expression_without_invalid ) { - _res = _tmp_168_var; + _res = _tmp_169_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -34073,7 +34288,7 @@ _loop1_113_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop1_113[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop1_114[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(!STRING expression_without_invalid)")); } if (_n == 0 || p->error_indicator) { @@ -34095,9 +34310,9 @@ _loop1_113_rule(Parser *p) return _seq; } -// _tmp_114: NAME STRING | SOFT_KEYWORD +// _tmp_115: NAME STRING | SOFT_KEYWORD static void * -_tmp_114_rule(Parser *p) +_tmp_115_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34113,7 +34328,7 @@ _tmp_114_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_114[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME STRING")); + D(fprintf(stderr, "%*c> _tmp_115[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME STRING")); expr_ty name_var; expr_ty string_var; if ( @@ -34122,12 +34337,12 @@ _tmp_114_rule(Parser *p) (string_var = _PyPegen_string_token(p)) // STRING ) { - D(fprintf(stderr, "%*c+ _tmp_114[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME STRING")); + D(fprintf(stderr, "%*c+ _tmp_115[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME STRING")); _res = _PyPegen_dummy_name(p, name_var, string_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_114[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_115[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME STRING")); } { // SOFT_KEYWORD @@ -34135,18 +34350,18 @@ _tmp_114_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_114[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "SOFT_KEYWORD")); + D(fprintf(stderr, "%*c> _tmp_115[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "SOFT_KEYWORD")); expr_ty soft_keyword_var; if ( (soft_keyword_var = _PyPegen_soft_keyword_token(p)) // SOFT_KEYWORD ) { - D(fprintf(stderr, "%*c+ _tmp_114[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "SOFT_KEYWORD")); + D(fprintf(stderr, "%*c+ _tmp_115[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "SOFT_KEYWORD")); _res = soft_keyword_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_114[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_115[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "SOFT_KEYWORD")); } _res = NULL; @@ -34155,9 +34370,9 @@ _tmp_114_rule(Parser *p) return _res; } -// _tmp_115: 'else' | ':' +// _tmp_116: 'else' | ':' static void * -_tmp_115_rule(Parser *p) +_tmp_116_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34173,18 +34388,18 @@ _tmp_115_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_115[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); + D(fprintf(stderr, "%*c> _tmp_116[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 686)) // token='else' ) { - D(fprintf(stderr, "%*c+ _tmp_115[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); + D(fprintf(stderr, "%*c+ _tmp_116[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_115[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_116[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'else'")); } { // ':' @@ -34192,18 +34407,18 @@ _tmp_115_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_115[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_116[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_115[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_116[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_115[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_116[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } _res = NULL; @@ -34212,9 +34427,9 @@ _tmp_115_rule(Parser *p) return _res; } -// _tmp_116: pass_stmt | break_stmt | continue_stmt +// _tmp_117: pass_stmt | break_stmt | continue_stmt static void * -_tmp_116_rule(Parser *p) +_tmp_117_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34230,18 +34445,18 @@ _tmp_116_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_116[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "pass_stmt")); + D(fprintf(stderr, "%*c> _tmp_117[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "pass_stmt")); stmt_ty pass_stmt_var; if ( (pass_stmt_var = pass_stmt_rule(p)) // pass_stmt ) { - D(fprintf(stderr, "%*c+ _tmp_116[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "pass_stmt")); + D(fprintf(stderr, "%*c+ _tmp_117[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "pass_stmt")); _res = pass_stmt_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_116[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_117[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "pass_stmt")); } { // break_stmt @@ -34249,18 +34464,18 @@ _tmp_116_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_116[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "break_stmt")); + D(fprintf(stderr, "%*c> _tmp_117[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "break_stmt")); stmt_ty break_stmt_var; if ( (break_stmt_var = break_stmt_rule(p)) // break_stmt ) { - D(fprintf(stderr, "%*c+ _tmp_116[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "break_stmt")); + D(fprintf(stderr, "%*c+ _tmp_117[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "break_stmt")); _res = break_stmt_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_116[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_117[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "break_stmt")); } { // continue_stmt @@ -34268,18 +34483,18 @@ _tmp_116_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_116[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "continue_stmt")); + D(fprintf(stderr, "%*c> _tmp_117[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "continue_stmt")); stmt_ty continue_stmt_var; if ( (continue_stmt_var = continue_stmt_rule(p)) // continue_stmt ) { - D(fprintf(stderr, "%*c+ _tmp_116[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "continue_stmt")); + D(fprintf(stderr, "%*c+ _tmp_117[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "continue_stmt")); _res = continue_stmt_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_116[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_117[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "continue_stmt")); } _res = NULL; @@ -34288,9 +34503,9 @@ _tmp_116_rule(Parser *p) return _res; } -// _tmp_117: '=' | ':=' +// _tmp_118: '=' | ':=' static void * -_tmp_117_rule(Parser *p) +_tmp_118_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34306,18 +34521,18 @@ _tmp_117_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_117[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'='")); + D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'='")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 22)) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_117[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'='")); + D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'='")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_117[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'='")); } { // ':=' @@ -34325,18 +34540,18 @@ _tmp_117_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_117[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':='")); + D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':='")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 53)) // token=':=' ) { - D(fprintf(stderr, "%*c+ _tmp_117[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':='")); + D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':='")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_117[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':='")); } _res = NULL; @@ -34345,9 +34560,9 @@ _tmp_117_rule(Parser *p) return _res; } -// _tmp_118: list | tuple | genexp | 'True' | 'None' | 'False' +// _tmp_119: list | tuple | genexp | 'True' | 'None' | 'False' static void * -_tmp_118_rule(Parser *p) +_tmp_119_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34363,18 +34578,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "list")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "list")); expr_ty list_var; if ( (list_var = list_rule(p)) // list ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "list")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "list")); _res = list_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "list")); } { // tuple @@ -34382,18 +34597,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tuple")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tuple")); expr_ty tuple_var; if ( (tuple_var = tuple_rule(p)) // tuple ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "tuple")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "tuple")); _res = tuple_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "tuple")); } { // genexp @@ -34401,18 +34616,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "genexp")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "genexp")); expr_ty genexp_var; if ( (genexp_var = genexp_rule(p)) // genexp ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "genexp")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "genexp")); _res = genexp_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "genexp")); } { // 'True' @@ -34420,18 +34635,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 622)) // token='True' ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'True'")); } { // 'None' @@ -34439,18 +34654,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 623)) // token='None' ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'None'")); } { // 'False' @@ -34458,18 +34673,18 @@ _tmp_118_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_118[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); + D(fprintf(stderr, "%*c> _tmp_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 624)) // token='False' ) { - D(fprintf(stderr, "%*c+ _tmp_118[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); + D(fprintf(stderr, "%*c+ _tmp_119[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_118[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_119[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'False'")); } _res = NULL; @@ -34478,9 +34693,9 @@ _tmp_118_rule(Parser *p) return _res; } -// _loop0_119: star_named_expressions +// _loop0_120: star_named_expressions static asdl_seq * -_loop0_119_rule(Parser *p) +_loop0_120_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34505,7 +34720,7 @@ _loop0_119_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_119[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_named_expressions")); + D(fprintf(stderr, "%*c> _loop0_120[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_named_expressions")); asdl_expr_seq* star_named_expressions_var; while ( (star_named_expressions_var = star_named_expressions_rule(p)) // star_named_expressions @@ -34528,7 +34743,7 @@ _loop0_119_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_119[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_120[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "star_named_expressions")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -34545,9 +34760,9 @@ _loop0_119_rule(Parser *p) return _seq; } -// _loop0_120: (star_targets '=') +// _loop0_121: (star_targets '=') static asdl_seq * -_loop0_120_rule(Parser *p) +_loop0_121_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34572,13 +34787,13 @@ _loop0_120_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_120[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(star_targets '=')")); - void *_tmp_155_var; + D(fprintf(stderr, "%*c> _loop0_121[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(star_targets '=')")); + void *_tmp_157_var; while ( - (_tmp_155_var = _tmp_155_rule(p)) // star_targets '=' + (_tmp_157_var = _tmp_157_rule(p)) // star_targets '=' ) { - _res = _tmp_155_var; + _res = _tmp_157_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -34595,7 +34810,7 @@ _loop0_120_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_120[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_121[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(star_targets '=')")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -34612,9 +34827,9 @@ _loop0_120_rule(Parser *p) return _seq; } -// _tmp_121: '[' | '(' | '{' +// _tmp_122: '[' | '(' | '{' static void * -_tmp_121_rule(Parser *p) +_tmp_122_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34630,18 +34845,18 @@ _tmp_121_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_121[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'['")); + D(fprintf(stderr, "%*c> _tmp_122[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'['")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 9)) // token='[' ) { - D(fprintf(stderr, "%*c+ _tmp_121[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'['")); + D(fprintf(stderr, "%*c+ _tmp_122[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'['")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_121[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_122[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'['")); } { // '(' @@ -34649,18 +34864,18 @@ _tmp_121_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_121[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'('")); + D(fprintf(stderr, "%*c> _tmp_122[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'('")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 7)) // token='(' ) { - D(fprintf(stderr, "%*c+ _tmp_121[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'('")); + D(fprintf(stderr, "%*c+ _tmp_122[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'('")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_121[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_122[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'('")); } { // '{' @@ -34668,18 +34883,18 @@ _tmp_121_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_121[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{'")); + D(fprintf(stderr, "%*c> _tmp_122[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' ) { - D(fprintf(stderr, "%*c+ _tmp_121[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{'")); + D(fprintf(stderr, "%*c+ _tmp_122[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_121[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_122[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'{'")); } _res = NULL; @@ -34688,9 +34903,9 @@ _tmp_121_rule(Parser *p) return _res; } -// _tmp_122: '[' | '{' +// _tmp_123: '[' | '{' static void * -_tmp_122_rule(Parser *p) +_tmp_123_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34706,18 +34921,18 @@ _tmp_122_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_122[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'['")); + D(fprintf(stderr, "%*c> _tmp_123[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'['")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 9)) // token='[' ) { - D(fprintf(stderr, "%*c+ _tmp_122[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'['")); + D(fprintf(stderr, "%*c+ _tmp_123[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'['")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_122[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_123[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'['")); } { // '{' @@ -34725,18 +34940,18 @@ _tmp_122_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_122[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{'")); + D(fprintf(stderr, "%*c> _tmp_123[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' ) { - D(fprintf(stderr, "%*c+ _tmp_122[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{'")); + D(fprintf(stderr, "%*c+ _tmp_123[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_122[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_123[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'{'")); } _res = NULL; @@ -34745,9 +34960,9 @@ _tmp_122_rule(Parser *p) return _res; } -// _tmp_123: slash_no_default | slash_with_default +// _tmp_124: slash_no_default | slash_with_default static void * -_tmp_123_rule(Parser *p) +_tmp_124_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34763,18 +34978,18 @@ _tmp_123_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_123[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slash_no_default")); + D(fprintf(stderr, "%*c> _tmp_124[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slash_no_default")); asdl_arg_seq* slash_no_default_var; if ( (slash_no_default_var = slash_no_default_rule(p)) // slash_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_123[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slash_no_default")); + D(fprintf(stderr, "%*c+ _tmp_124[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slash_no_default")); _res = slash_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_123[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_124[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "slash_no_default")); } { // slash_with_default @@ -34782,18 +34997,18 @@ _tmp_123_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_123[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slash_with_default")); + D(fprintf(stderr, "%*c> _tmp_124[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slash_with_default")); SlashWithDefault* slash_with_default_var; if ( (slash_with_default_var = slash_with_default_rule(p)) // slash_with_default ) { - D(fprintf(stderr, "%*c+ _tmp_123[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slash_with_default")); + D(fprintf(stderr, "%*c+ _tmp_124[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slash_with_default")); _res = slash_with_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_123[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_124[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "slash_with_default")); } _res = NULL; @@ -34802,9 +35017,9 @@ _tmp_123_rule(Parser *p) return _res; } -// _tmp_124: ',' | param_no_default +// _tmp_125: ',' | param_no_default static void * -_tmp_124_rule(Parser *p) +_tmp_125_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34820,18 +35035,18 @@ _tmp_124_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_124[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_125[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_124[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_125[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_124[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_125[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } { // param_no_default @@ -34839,18 +35054,18 @@ _tmp_124_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_124[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "param_no_default")); + D(fprintf(stderr, "%*c> _tmp_125[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "param_no_default")); arg_ty param_no_default_var; if ( (param_no_default_var = param_no_default_rule(p)) // param_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_124[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default")); + D(fprintf(stderr, "%*c+ _tmp_125[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default")); _res = param_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_124[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_125[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "param_no_default")); } _res = NULL; @@ -34859,9 +35074,9 @@ _tmp_124_rule(Parser *p) return _res; } -// _tmp_125: ')' | ',' +// _tmp_126: ')' | ',' static void * -_tmp_125_rule(Parser *p) +_tmp_126_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34877,18 +35092,18 @@ _tmp_125_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_125[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c> _tmp_126[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 8)) // token=')' ) { - D(fprintf(stderr, "%*c+ _tmp_125[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c+ _tmp_126[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_125[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_126[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "')'")); } { // ',' @@ -34896,18 +35111,18 @@ _tmp_125_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_125[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_126[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_125[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_126[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_125[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_126[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } _res = NULL; @@ -34916,9 +35131,9 @@ _tmp_125_rule(Parser *p) return _res; } -// _tmp_126: ')' | ',' (')' | '**') +// _tmp_127: ')' | ',' (')' | '**') static void * -_tmp_126_rule(Parser *p) +_tmp_127_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34934,18 +35149,18 @@ _tmp_126_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_126[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c> _tmp_127[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 8)) // token=')' ) { - D(fprintf(stderr, "%*c+ _tmp_126[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c+ _tmp_127[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_126[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_127[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "')'")); } { // ',' (')' | '**') @@ -34953,21 +35168,21 @@ _tmp_126_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_126[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (')' | '**')")); + D(fprintf(stderr, "%*c> _tmp_127[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (')' | '**')")); Token * _literal; - void *_tmp_169_var; + void *_tmp_170_var; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (_tmp_169_var = _tmp_169_rule(p)) // ')' | '**' + (_tmp_170_var = _tmp_170_rule(p)) // ')' | '**' ) { - D(fprintf(stderr, "%*c+ _tmp_126[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' (')' | '**')")); - _res = _PyPegen_dummy_name(p, _literal, _tmp_169_var); + D(fprintf(stderr, "%*c+ _tmp_127[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' (')' | '**')")); + _res = _PyPegen_dummy_name(p, _literal, _tmp_170_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_126[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_127[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (')' | '**')")); } _res = NULL; @@ -34976,9 +35191,9 @@ _tmp_126_rule(Parser *p) return _res; } -// _tmp_127: param_no_default | ',' +// _tmp_128: param_no_default | ',' static void * -_tmp_127_rule(Parser *p) +_tmp_128_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -34994,18 +35209,18 @@ _tmp_127_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_127[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "param_no_default")); + D(fprintf(stderr, "%*c> _tmp_128[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "param_no_default")); arg_ty param_no_default_var; if ( (param_no_default_var = param_no_default_rule(p)) // param_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_127[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default")); + D(fprintf(stderr, "%*c+ _tmp_128[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default")); _res = param_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_127[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_128[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "param_no_default")); } { // ',' @@ -35013,18 +35228,18 @@ _tmp_127_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_127[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_128[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_127[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_128[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_127[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_128[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } _res = NULL; @@ -35033,9 +35248,9 @@ _tmp_127_rule(Parser *p) return _res; } -// _tmp_128: '*' | '**' | '/' +// _tmp_129: '*' | '**' | '/' static void * -_tmp_128_rule(Parser *p) +_tmp_129_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35051,18 +35266,18 @@ _tmp_128_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_128[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*'")); + D(fprintf(stderr, "%*c> _tmp_129[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' ) { - D(fprintf(stderr, "%*c+ _tmp_128[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*'")); + D(fprintf(stderr, "%*c+ _tmp_129[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_128[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_129[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'*'")); } { // '**' @@ -35070,18 +35285,18 @@ _tmp_128_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_128[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c> _tmp_129[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 35)) // token='**' ) { - D(fprintf(stderr, "%*c+ _tmp_128[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c+ _tmp_129[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_128[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_129[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'**'")); } { // '/' @@ -35089,18 +35304,18 @@ _tmp_128_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_128[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'/'")); + D(fprintf(stderr, "%*c> _tmp_129[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'/'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 17)) // token='/' ) { - D(fprintf(stderr, "%*c+ _tmp_128[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'/'")); + D(fprintf(stderr, "%*c+ _tmp_129[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'/'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_128[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_129[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'/'")); } _res = NULL; @@ -35109,9 +35324,9 @@ _tmp_128_rule(Parser *p) return _res; } -// _tmp_129: lambda_slash_no_default | lambda_slash_with_default +// _tmp_130: lambda_slash_no_default | lambda_slash_with_default static void * -_tmp_129_rule(Parser *p) +_tmp_130_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35127,18 +35342,18 @@ _tmp_129_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_129[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_slash_no_default")); + D(fprintf(stderr, "%*c> _tmp_130[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_slash_no_default")); asdl_arg_seq* lambda_slash_no_default_var; if ( (lambda_slash_no_default_var = lambda_slash_no_default_rule(p)) // lambda_slash_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_129[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_slash_no_default")); + D(fprintf(stderr, "%*c+ _tmp_130[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_slash_no_default")); _res = lambda_slash_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_129[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_130[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_slash_no_default")); } { // lambda_slash_with_default @@ -35146,18 +35361,18 @@ _tmp_129_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_129[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_slash_with_default")); + D(fprintf(stderr, "%*c> _tmp_130[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_slash_with_default")); SlashWithDefault* lambda_slash_with_default_var; if ( (lambda_slash_with_default_var = lambda_slash_with_default_rule(p)) // lambda_slash_with_default ) { - D(fprintf(stderr, "%*c+ _tmp_129[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_slash_with_default")); + D(fprintf(stderr, "%*c+ _tmp_130[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_slash_with_default")); _res = lambda_slash_with_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_129[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_130[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_slash_with_default")); } _res = NULL; @@ -35166,9 +35381,9 @@ _tmp_129_rule(Parser *p) return _res; } -// _loop0_130: ',' lambda_param +// _loop0_131: ',' lambda_param static asdl_seq * -_loop0_130_rule(Parser *p) +_loop0_131_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35193,7 +35408,7 @@ _loop0_130_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_130[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' lambda_param")); + D(fprintf(stderr, "%*c> _loop0_131[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' lambda_param")); Token * _literal; arg_ty elem; while ( @@ -35225,7 +35440,7 @@ _loop0_130_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_130[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_131[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' lambda_param")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -35242,9 +35457,9 @@ _loop0_130_rule(Parser *p) return _seq; } -// _gather_131: lambda_param _loop0_130 +// _gather_132: lambda_param _loop0_131 static asdl_seq * -_gather_131_rule(Parser *p) +_gather_132_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35255,27 +35470,27 @@ _gather_131_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // lambda_param _loop0_130 + { // lambda_param _loop0_131 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_131[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param _loop0_130")); + D(fprintf(stderr, "%*c> _gather_132[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param _loop0_131")); arg_ty elem; asdl_seq * seq; if ( (elem = lambda_param_rule(p)) // lambda_param && - (seq = _loop0_130_rule(p)) // _loop0_130 + (seq = _loop0_131_rule(p)) // _loop0_131 ) { - D(fprintf(stderr, "%*c+ _gather_131[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param _loop0_130")); + D(fprintf(stderr, "%*c+ _gather_132[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param _loop0_131")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_131[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_param _loop0_130")); + D(fprintf(stderr, "%*c%s _gather_132[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_param _loop0_131")); } _res = NULL; done: @@ -35283,9 +35498,9 @@ _gather_131_rule(Parser *p) return _res; } -// _tmp_132: ',' | lambda_param_no_default +// _tmp_133: ',' | lambda_param_no_default static void * -_tmp_132_rule(Parser *p) +_tmp_133_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35301,18 +35516,18 @@ _tmp_132_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_132[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_133[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_132[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_133[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_132[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_133[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } { // lambda_param_no_default @@ -35320,18 +35535,18 @@ _tmp_132_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_132[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); + D(fprintf(stderr, "%*c> _tmp_133[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); arg_ty lambda_param_no_default_var; if ( (lambda_param_no_default_var = lambda_param_no_default_rule(p)) // lambda_param_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_132[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); + D(fprintf(stderr, "%*c+ _tmp_133[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); _res = lambda_param_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_132[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_133[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_param_no_default")); } _res = NULL; @@ -35340,9 +35555,9 @@ _tmp_132_rule(Parser *p) return _res; } -// _tmp_133: ':' | ',' (':' | '**') +// _tmp_134: ':' | ',' (':' | '**') static void * -_tmp_133_rule(Parser *p) +_tmp_134_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35358,18 +35573,18 @@ _tmp_133_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_133[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_134[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_133[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_134[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_133[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_134[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } { // ',' (':' | '**') @@ -35377,21 +35592,21 @@ _tmp_133_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_133[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (':' | '**')")); + D(fprintf(stderr, "%*c> _tmp_134[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (':' | '**')")); Token * _literal; - void *_tmp_170_var; + void *_tmp_171_var; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (_tmp_170_var = _tmp_170_rule(p)) // ':' | '**' + (_tmp_171_var = _tmp_171_rule(p)) // ':' | '**' ) { - D(fprintf(stderr, "%*c+ _tmp_133[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' (':' | '**')")); - _res = _PyPegen_dummy_name(p, _literal, _tmp_170_var); + D(fprintf(stderr, "%*c+ _tmp_134[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' (':' | '**')")); + _res = _PyPegen_dummy_name(p, _literal, _tmp_171_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_133[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_134[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (':' | '**')")); } _res = NULL; @@ -35400,9 +35615,9 @@ _tmp_133_rule(Parser *p) return _res; } -// _tmp_134: lambda_param_no_default | ',' +// _tmp_135: lambda_param_no_default | ',' static void * -_tmp_134_rule(Parser *p) +_tmp_135_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35418,18 +35633,18 @@ _tmp_134_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_134[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); + D(fprintf(stderr, "%*c> _tmp_135[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); arg_ty lambda_param_no_default_var; if ( (lambda_param_no_default_var = lambda_param_no_default_rule(p)) // lambda_param_no_default ) { - D(fprintf(stderr, "%*c+ _tmp_134[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); + D(fprintf(stderr, "%*c+ _tmp_135[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default")); _res = lambda_param_no_default_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_134[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_135[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_param_no_default")); } { // ',' @@ -35437,18 +35652,18 @@ _tmp_134_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_134[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_135[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_134[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_135[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_134[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_135[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } _res = NULL; @@ -35457,9 +35672,9 @@ _tmp_134_rule(Parser *p) return _res; } -// _tmp_135: bitwise_or ((',' bitwise_or))* ','? +// _tmp_136: bitwise_or ((',' bitwise_or))* ','? static void * -_tmp_135_rule(Parser *p) +_tmp_136_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35475,25 +35690,25 @@ _tmp_135_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_135[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "bitwise_or ((',' bitwise_or))* ','?")); - asdl_seq * _loop0_171_var; + D(fprintf(stderr, "%*c> _tmp_136[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "bitwise_or ((',' bitwise_or))* ','?")); + asdl_seq * _loop0_172_var; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings expr_ty bitwise_or_var; if ( (bitwise_or_var = bitwise_or_rule(p)) // bitwise_or && - (_loop0_171_var = _loop0_171_rule(p)) // ((',' bitwise_or))* + (_loop0_172_var = _loop0_172_rule(p)) // ((',' bitwise_or))* && (_opt_var = _PyPegen_expect_token(p, 12), !p->error_indicator) // ','? ) { - D(fprintf(stderr, "%*c+ _tmp_135[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "bitwise_or ((',' bitwise_or))* ','?")); - _res = _PyPegen_dummy_name(p, bitwise_or_var, _loop0_171_var, _opt_var); + D(fprintf(stderr, "%*c+ _tmp_136[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "bitwise_or ((',' bitwise_or))* ','?")); + _res = _PyPegen_dummy_name(p, bitwise_or_var, _loop0_172_var, _opt_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_135[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_136[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "bitwise_or ((',' bitwise_or))* ','?")); } _res = NULL; @@ -35502,9 +35717,9 @@ _tmp_135_rule(Parser *p) return _res; } -// _loop0_136: ',' dotted_name +// _loop0_137: ',' dotted_name static asdl_seq * -_loop0_136_rule(Parser *p) +_loop0_137_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35529,7 +35744,7 @@ _loop0_136_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_136[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' dotted_name")); + D(fprintf(stderr, "%*c> _loop0_137[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' dotted_name")); Token * _literal; expr_ty elem; while ( @@ -35561,7 +35776,7 @@ _loop0_136_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_136[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_137[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' dotted_name")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -35578,9 +35793,9 @@ _loop0_136_rule(Parser *p) return _seq; } -// _gather_137: dotted_name _loop0_136 +// _gather_138: dotted_name _loop0_137 static asdl_seq * -_gather_137_rule(Parser *p) +_gather_138_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35591,27 +35806,27 @@ _gather_137_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // dotted_name _loop0_136 + { // dotted_name _loop0_137 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_137[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "dotted_name _loop0_136")); + D(fprintf(stderr, "%*c> _gather_138[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "dotted_name _loop0_137")); expr_ty elem; asdl_seq * seq; if ( (elem = dotted_name_rule(p)) // dotted_name && - (seq = _loop0_136_rule(p)) // _loop0_136 + (seq = _loop0_137_rule(p)) // _loop0_137 ) { - D(fprintf(stderr, "%*c+ _gather_137[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "dotted_name _loop0_136")); + D(fprintf(stderr, "%*c+ _gather_138[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "dotted_name _loop0_137")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_137[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "dotted_name _loop0_136")); + D(fprintf(stderr, "%*c%s _gather_138[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "dotted_name _loop0_137")); } _res = NULL; done: @@ -35619,9 +35834,9 @@ _gather_137_rule(Parser *p) return _res; } -// _tmp_138: NAME (',' | ')' | NEWLINE) +// _tmp_139: NAME (',' | ')' | ';' | NEWLINE) static void * -_tmp_138_rule(Parser *p) +_tmp_139_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35632,27 +35847,27 @@ _tmp_138_rule(Parser *p) } void * _res = NULL; int _mark = p->mark; - { // NAME (',' | ')' | NEWLINE) + { // NAME (',' | ')' | ';' | NEWLINE) if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_138[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME (',' | ')' | NEWLINE)")); - void *_tmp_172_var; + D(fprintf(stderr, "%*c> _tmp_139[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME (',' | ')' | ';' | NEWLINE)")); + void *_tmp_173_var; expr_ty name_var; if ( (name_var = _PyPegen_name_token(p)) // NAME && - (_tmp_172_var = _tmp_172_rule(p)) // ',' | ')' | NEWLINE + (_tmp_173_var = _tmp_173_rule(p)) // ',' | ')' | ';' | NEWLINE ) { - D(fprintf(stderr, "%*c+ _tmp_138[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME (',' | ')' | NEWLINE)")); - _res = _PyPegen_dummy_name(p, name_var, _tmp_172_var); + D(fprintf(stderr, "%*c+ _tmp_139[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME (',' | ')' | ';' | NEWLINE)")); + _res = _PyPegen_dummy_name(p, name_var, _tmp_173_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_138[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME (',' | ')' | NEWLINE)")); + D(fprintf(stderr, "%*c%s _tmp_139[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME (',' | ')' | ';' | NEWLINE)")); } _res = NULL; done: @@ -35660,9 +35875,9 @@ _tmp_138_rule(Parser *p) return _res; } -// _loop0_139: ',' (expression ['as' star_target]) +// _loop0_140: ',' (expression ['as' star_target]) static asdl_seq * -_loop0_139_rule(Parser *p) +_loop0_140_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35687,13 +35902,13 @@ _loop0_139_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_139[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (expression ['as' star_target])")); + D(fprintf(stderr, "%*c> _loop0_140[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (expression ['as' star_target])")); Token * _literal; void *elem; while ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (elem = _tmp_173_rule(p)) // expression ['as' star_target] + (elem = _tmp_174_rule(p)) // expression ['as' star_target] ) { _res = elem; @@ -35719,7 +35934,7 @@ _loop0_139_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_139[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_140[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (expression ['as' star_target])")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -35736,9 +35951,9 @@ _loop0_139_rule(Parser *p) return _seq; } -// _gather_140: (expression ['as' star_target]) _loop0_139 +// _gather_141: (expression ['as' star_target]) _loop0_140 static asdl_seq * -_gather_140_rule(Parser *p) +_gather_141_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35749,27 +35964,27 @@ _gather_140_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // (expression ['as' star_target]) _loop0_139 + { // (expression ['as' star_target]) _loop0_140 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_140[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(expression ['as' star_target]) _loop0_139")); + D(fprintf(stderr, "%*c> _gather_141[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(expression ['as' star_target]) _loop0_140")); void *elem; asdl_seq * seq; if ( - (elem = _tmp_173_rule(p)) // expression ['as' star_target] + (elem = _tmp_174_rule(p)) // expression ['as' star_target] && - (seq = _loop0_139_rule(p)) // _loop0_139 + (seq = _loop0_140_rule(p)) // _loop0_140 ) { - D(fprintf(stderr, "%*c+ _gather_140[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(expression ['as' star_target]) _loop0_139")); + D(fprintf(stderr, "%*c+ _gather_141[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(expression ['as' star_target]) _loop0_140")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_140[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(expression ['as' star_target]) _loop0_139")); + D(fprintf(stderr, "%*c%s _gather_141[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(expression ['as' star_target]) _loop0_140")); } _res = NULL; done: @@ -35777,9 +35992,9 @@ _gather_140_rule(Parser *p) return _res; } -// _loop0_141: ',' (expressions ['as' star_target]) +// _loop0_142: ',' (expressions ['as' star_target]) static asdl_seq * -_loop0_141_rule(Parser *p) +_loop0_142_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35804,13 +36019,13 @@ _loop0_141_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_141[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (expressions ['as' star_target])")); + D(fprintf(stderr, "%*c> _loop0_142[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' (expressions ['as' star_target])")); Token * _literal; void *elem; while ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' && - (elem = _tmp_174_rule(p)) // expressions ['as' star_target] + (elem = _tmp_175_rule(p)) // expressions ['as' star_target] ) { _res = elem; @@ -35836,7 +36051,7 @@ _loop0_141_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_141[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_142[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' (expressions ['as' star_target])")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -35853,9 +36068,9 @@ _loop0_141_rule(Parser *p) return _seq; } -// _gather_142: (expressions ['as' star_target]) _loop0_141 +// _gather_143: (expressions ['as' star_target]) _loop0_142 static asdl_seq * -_gather_142_rule(Parser *p) +_gather_143_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35866,27 +36081,27 @@ _gather_142_rule(Parser *p) } asdl_seq * _res = NULL; int _mark = p->mark; - { // (expressions ['as' star_target]) _loop0_141 + { // (expressions ['as' star_target]) _loop0_142 if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _gather_142[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(expressions ['as' star_target]) _loop0_141")); + D(fprintf(stderr, "%*c> _gather_143[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(expressions ['as' star_target]) _loop0_142")); void *elem; asdl_seq * seq; if ( - (elem = _tmp_174_rule(p)) // expressions ['as' star_target] + (elem = _tmp_175_rule(p)) // expressions ['as' star_target] && - (seq = _loop0_141_rule(p)) // _loop0_141 + (seq = _loop0_142_rule(p)) // _loop0_142 ) { - D(fprintf(stderr, "%*c+ _gather_142[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(expressions ['as' star_target]) _loop0_141")); + D(fprintf(stderr, "%*c+ _gather_143[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(expressions ['as' star_target]) _loop0_142")); _res = _PyPegen_seq_insert_in_front(p, elem, seq); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _gather_142[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(expressions ['as' star_target]) _loop0_141")); + D(fprintf(stderr, "%*c%s _gather_143[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(expressions ['as' star_target]) _loop0_142")); } _res = NULL; done: @@ -35894,9 +36109,9 @@ _gather_142_rule(Parser *p) return _res; } -// _tmp_143: 'except' | 'finally' +// _tmp_144: 'except' | 'finally' static void * -_tmp_143_rule(Parser *p) +_tmp_144_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35912,18 +36127,18 @@ _tmp_143_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_143[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); + D(fprintf(stderr, "%*c> _tmp_144[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' ) { - D(fprintf(stderr, "%*c+ _tmp_143[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); + D(fprintf(stderr, "%*c+ _tmp_144[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_143[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_144[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except'")); } { // 'finally' @@ -35931,18 +36146,18 @@ _tmp_143_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_143[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); + D(fprintf(stderr, "%*c> _tmp_144[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( (_keyword = _PyPegen_expect_token(p, 673)) // token='finally' ) { - D(fprintf(stderr, "%*c+ _tmp_143[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); + D(fprintf(stderr, "%*c+ _tmp_144[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); _res = _keyword; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_143[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_144[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'finally'")); } _res = NULL; @@ -35951,9 +36166,9 @@ _tmp_143_rule(Parser *p) return _res; } -// _loop0_144: block +// _loop0_145: block static asdl_seq * -_loop0_144_rule(Parser *p) +_loop0_145_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -35978,7 +36193,7 @@ _loop0_144_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_144[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "block")); + D(fprintf(stderr, "%*c> _loop0_145[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "block")); asdl_stmt_seq* block_var; while ( (block_var = block_rule(p)) // block @@ -36001,7 +36216,7 @@ _loop0_144_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_144[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_145[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "block")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -36018,9 +36233,9 @@ _loop0_144_rule(Parser *p) return _seq; } -// _tmp_145: expression ['as' NAME] +// _tmp_146: expression ['as' NAME] static void * -_tmp_145_rule(Parser *p) +_tmp_146_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36036,7 +36251,7 @@ _tmp_145_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_145[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression ['as' NAME]")); + D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression ['as' NAME]")); void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings expr_ty expression_var; @@ -36046,12 +36261,12 @@ _tmp_145_rule(Parser *p) (_opt_var = _tmp_22_rule(p), !p->error_indicator) // ['as' NAME] ) { - D(fprintf(stderr, "%*c+ _tmp_145[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ['as' NAME]")); + D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ['as' NAME]")); _res = _PyPegen_dummy_name(p, expression_var, _opt_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_145[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_146[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression ['as' NAME]")); } _res = NULL; @@ -36060,9 +36275,9 @@ _tmp_145_rule(Parser *p) return _res; } -// _tmp_146: NEWLINE | ':' +// _tmp_147: NEWLINE | ':' static void * -_tmp_146_rule(Parser *p) +_tmp_147_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36078,18 +36293,18 @@ _tmp_146_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE")); + D(fprintf(stderr, "%*c> _tmp_147[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE")); Token * newline_var; if ( (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) { - D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE")); + D(fprintf(stderr, "%*c+ _tmp_147[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE")); _res = newline_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_146[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_147[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NEWLINE")); } { // ':' @@ -36097,18 +36312,18 @@ _tmp_146_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_147[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_147[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_146[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_147[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } _res = NULL; @@ -36117,9 +36332,9 @@ _tmp_146_rule(Parser *p) return _res; } -// _tmp_147: positional_patterns ',' +// _tmp_148: positional_patterns ',' static void * -_tmp_147_rule(Parser *p) +_tmp_148_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36135,7 +36350,7 @@ _tmp_147_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_147[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "positional_patterns ','")); + D(fprintf(stderr, "%*c> _tmp_148[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "positional_patterns ','")); Token * _literal; asdl_pattern_seq* positional_patterns_var; if ( @@ -36144,12 +36359,12 @@ _tmp_147_rule(Parser *p) (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_147[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "positional_patterns ','")); + D(fprintf(stderr, "%*c+ _tmp_148[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "positional_patterns ','")); _res = _PyPegen_dummy_name(p, positional_patterns_var, _literal); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_147[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_148[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "positional_patterns ','")); } _res = NULL; @@ -36158,9 +36373,9 @@ _tmp_147_rule(Parser *p) return _res; } -// _tmp_148: '}' | ',' +// _tmp_149: '}' | ',' static void * -_tmp_148_rule(Parser *p) +_tmp_149_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36176,18 +36391,18 @@ _tmp_148_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_148[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 26)) // token='}' ) { - D(fprintf(stderr, "%*c+ _tmp_148[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_148[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'}'")); } { // ',' @@ -36195,18 +36410,18 @@ _tmp_148_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_148[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_148[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_148[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } _res = NULL; @@ -36215,9 +36430,9 @@ _tmp_148_rule(Parser *p) return _res; } -// _tmp_149: '=' | '!' | ':' | '}' +// _tmp_150: '=' | '!' | ':' | '}' static void * -_tmp_149_rule(Parser *p) +_tmp_150_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36233,18 +36448,18 @@ _tmp_149_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'='")); + D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'='")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 22)) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'='")); + D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'='")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'='")); } { // '!' @@ -36252,18 +36467,18 @@ _tmp_149_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!'")); + D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' ) { - D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!'")); + D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'!'")); } { // ':' @@ -36271,18 +36486,18 @@ _tmp_149_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } { // '}' @@ -36290,18 +36505,18 @@ _tmp_149_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_149[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 26)) // token='}' ) { - D(fprintf(stderr, "%*c+ _tmp_149[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_149[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'}'")); } _res = NULL; @@ -36310,9 +36525,9 @@ _tmp_149_rule(Parser *p) return _res; } -// _tmp_150: '!' | ':' | '}' +// _tmp_151: '!' | ':' | '}' static void * -_tmp_150_rule(Parser *p) +_tmp_151_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36328,18 +36543,18 @@ _tmp_150_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!'")); + D(fprintf(stderr, "%*c> _tmp_151[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' ) { - D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!'")); + D(fprintf(stderr, "%*c+ _tmp_151[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_151[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'!'")); } { // ':' @@ -36347,18 +36562,18 @@ _tmp_150_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_151[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_151[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_151[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } { // '}' @@ -36366,18 +36581,18 @@ _tmp_150_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_150[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c> _tmp_151[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 26)) // token='}' ) { - D(fprintf(stderr, "%*c+ _tmp_150[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c+ _tmp_151[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_150[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_151[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'}'")); } _res = NULL; @@ -36386,9 +36601,9 @@ _tmp_150_rule(Parser *p) return _res; } -// _tmp_151: '!' NAME +// _tmp_152: '!' NAME static void * -_tmp_151_rule(Parser *p) +_tmp_152_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36404,7 +36619,7 @@ _tmp_151_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_151[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!' NAME")); + D(fprintf(stderr, "%*c> _tmp_152[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'!' NAME")); Token * _literal; expr_ty name_var; if ( @@ -36413,12 +36628,12 @@ _tmp_151_rule(Parser *p) (name_var = _PyPegen_name_token(p)) // NAME ) { - D(fprintf(stderr, "%*c+ _tmp_151[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' NAME")); + D(fprintf(stderr, "%*c+ _tmp_152[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' NAME")); _res = _PyPegen_dummy_name(p, _literal, name_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_151[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_152[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'!' NAME")); } _res = NULL; @@ -36427,9 +36642,9 @@ _tmp_151_rule(Parser *p) return _res; } -// _tmp_152: ':' | '}' +// _tmp_153: ':' | '}' static void * -_tmp_152_rule(Parser *p) +_tmp_153_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36445,18 +36660,18 @@ _tmp_152_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_152[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_152[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_152[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } { // '}' @@ -36464,18 +36679,18 @@ _tmp_152_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_152[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'}'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 26)) // token='}' ) { - D(fprintf(stderr, "%*c+ _tmp_152[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); + D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'}'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_152[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'}'")); } _res = NULL; @@ -36484,9 +36699,66 @@ _tmp_152_rule(Parser *p) return _res; } -// _tmp_153: '+' | '-' | '*' | '/' | '%' | '//' | '@' +// _tmp_154: fstring | string static void * -_tmp_153_rule(Parser *p) +_tmp_154_rule(Parser *p) +{ + if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { + _Pypegen_stack_overflow(p); + } + if (p->error_indicator) { + p->level--; + return NULL; + } + void * _res = NULL; + int _mark = p->mark; + { // fstring + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> _tmp_154[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "fstring")); + expr_ty fstring_var; + if ( + (fstring_var = fstring_rule(p)) // fstring + ) + { + D(fprintf(stderr, "%*c+ _tmp_154[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "fstring")); + _res = fstring_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s _tmp_154[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "fstring")); + } + { // string + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> _tmp_154[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "string")); + expr_ty string_var; + if ( + (string_var = string_rule(p)) // string + ) + { + D(fprintf(stderr, "%*c+ _tmp_154[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "string")); + _res = string_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s _tmp_154[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "string")); + } + _res = NULL; + done: + p->level--; + return _res; +} + +// _tmp_155: '+' | '-' | '*' | '/' | '%' | '//' | '@' +static void * +_tmp_155_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36502,18 +36774,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 14)) // token='+' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'+'")); } { // '-' @@ -36521,18 +36793,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'-'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'-'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 15)) // token='-' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'-'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'-'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'-'")); } { // '*' @@ -36540,18 +36812,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'*'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 16)) // token='*' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'*'")); } { // '/' @@ -36559,18 +36831,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'/'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'/'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 17)) // token='/' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'/'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'/'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'/'")); } { // '%' @@ -36578,18 +36850,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'%'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'%'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 24)) // token='%' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'%'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'%'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'%'")); } { // '//' @@ -36597,18 +36869,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'//'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'//'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 47)) // token='//' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'//'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'//'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'//'")); } { // '@' @@ -36616,18 +36888,18 @@ _tmp_153_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_153[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'@'")); + D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'@'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 49)) // token='@' ) { - D(fprintf(stderr, "%*c+ _tmp_153[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@'")); + D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_153[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'@'")); } _res = NULL; @@ -36636,9 +36908,9 @@ _tmp_153_rule(Parser *p) return _res; } -// _tmp_154: '+' | '-' | '~' +// _tmp_156: '+' | '-' | '~' static void * -_tmp_154_rule(Parser *p) +_tmp_156_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36654,18 +36926,18 @@ _tmp_154_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_154[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+'")); + D(fprintf(stderr, "%*c> _tmp_156[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'+'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 14)) // token='+' ) { - D(fprintf(stderr, "%*c+ _tmp_154[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+'")); + D(fprintf(stderr, "%*c+ _tmp_156[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'+'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_154[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_156[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'+'")); } { // '-' @@ -36673,18 +36945,18 @@ _tmp_154_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_154[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'-'")); + D(fprintf(stderr, "%*c> _tmp_156[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'-'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 15)) // token='-' ) { - D(fprintf(stderr, "%*c+ _tmp_154[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'-'")); + D(fprintf(stderr, "%*c+ _tmp_156[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'-'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_154[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_156[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'-'")); } { // '~' @@ -36692,18 +36964,18 @@ _tmp_154_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_154[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'~'")); + D(fprintf(stderr, "%*c> _tmp_156[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'~'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 31)) // token='~' ) { - D(fprintf(stderr, "%*c+ _tmp_154[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'~'")); + D(fprintf(stderr, "%*c+ _tmp_156[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'~'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_154[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_156[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'~'")); } _res = NULL; @@ -36712,9 +36984,9 @@ _tmp_154_rule(Parser *p) return _res; } -// _tmp_155: star_targets '=' +// _tmp_157: star_targets '=' static void * -_tmp_155_rule(Parser *p) +_tmp_157_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36730,7 +37002,7 @@ _tmp_155_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_155[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_targets '='")); + D(fprintf(stderr, "%*c> _tmp_157[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "star_targets '='")); Token * _literal; expr_ty z; if ( @@ -36739,7 +37011,7 @@ _tmp_155_rule(Parser *p) (_literal = _PyPegen_expect_token(p, 22)) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_155[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_targets '='")); + D(fprintf(stderr, "%*c+ _tmp_157[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_targets '='")); _res = z; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -36749,7 +37021,7 @@ _tmp_155_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_155[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_157[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "star_targets '='")); } _res = NULL; @@ -36758,9 +37030,9 @@ _tmp_155_rule(Parser *p) return _res; } -// _tmp_156: '.' | '...' +// _tmp_158: '.' | '...' static void * -_tmp_156_rule(Parser *p) +_tmp_158_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36776,18 +37048,18 @@ _tmp_156_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_156[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'.'")); + D(fprintf(stderr, "%*c> _tmp_158[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'.'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 23)) // token='.' ) { - D(fprintf(stderr, "%*c+ _tmp_156[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'.'")); + D(fprintf(stderr, "%*c+ _tmp_158[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'.'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_156[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_158[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'.'")); } { // '...' @@ -36795,18 +37067,18 @@ _tmp_156_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_156[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'...'")); + D(fprintf(stderr, "%*c> _tmp_158[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'...'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 52)) // token='...' ) { - D(fprintf(stderr, "%*c+ _tmp_156[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'...'")); + D(fprintf(stderr, "%*c+ _tmp_158[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'...'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_156[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_158[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'...'")); } _res = NULL; @@ -36815,9 +37087,9 @@ _tmp_156_rule(Parser *p) return _res; } -// _tmp_157: '@' named_expression NEWLINE +// _tmp_159: '@' named_expression NEWLINE static void * -_tmp_157_rule(Parser *p) +_tmp_159_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36833,7 +37105,7 @@ _tmp_157_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_157[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'@' named_expression NEWLINE")); + D(fprintf(stderr, "%*c> _tmp_159[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'@' named_expression NEWLINE")); Token * _literal; expr_ty f; Token * newline_var; @@ -36845,7 +37117,7 @@ _tmp_157_rule(Parser *p) (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) { - D(fprintf(stderr, "%*c+ _tmp_157[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@' named_expression NEWLINE")); + D(fprintf(stderr, "%*c+ _tmp_159[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@' named_expression NEWLINE")); _res = f; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -36855,7 +37127,7 @@ _tmp_157_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_157[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_159[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'@' named_expression NEWLINE")); } _res = NULL; @@ -36864,9 +37136,9 @@ _tmp_157_rule(Parser *p) return _res; } -// _tmp_158: ',' star_expression +// _tmp_160: ',' star_expression static void * -_tmp_158_rule(Parser *p) +_tmp_160_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36882,7 +37154,7 @@ _tmp_158_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_158[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_expression")); + D(fprintf(stderr, "%*c> _tmp_160[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_expression")); Token * _literal; expr_ty c; if ( @@ -36891,7 +37163,7 @@ _tmp_158_rule(Parser *p) (c = star_expression_rule(p)) // star_expression ) { - D(fprintf(stderr, "%*c+ _tmp_158[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' star_expression")); + D(fprintf(stderr, "%*c+ _tmp_160[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' star_expression")); _res = c; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -36901,7 +37173,7 @@ _tmp_158_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_158[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_160[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' star_expression")); } _res = NULL; @@ -36910,9 +37182,9 @@ _tmp_158_rule(Parser *p) return _res; } -// _tmp_159: 'or' conjunction +// _tmp_161: 'or' conjunction static void * -_tmp_159_rule(Parser *p) +_tmp_161_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36928,7 +37200,7 @@ _tmp_159_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_159[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'or' conjunction")); + D(fprintf(stderr, "%*c> _tmp_161[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'or' conjunction")); Token * _keyword; expr_ty c; if ( @@ -36937,7 +37209,7 @@ _tmp_159_rule(Parser *p) (c = conjunction_rule(p)) // conjunction ) { - D(fprintf(stderr, "%*c+ _tmp_159[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'or' conjunction")); + D(fprintf(stderr, "%*c+ _tmp_161[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'or' conjunction")); _res = c; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -36947,7 +37219,7 @@ _tmp_159_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_159[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_161[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'or' conjunction")); } _res = NULL; @@ -36956,9 +37228,9 @@ _tmp_159_rule(Parser *p) return _res; } -// _tmp_160: 'and' inversion +// _tmp_162: 'and' inversion static void * -_tmp_160_rule(Parser *p) +_tmp_162_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -36974,7 +37246,7 @@ _tmp_160_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_160[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'and' inversion")); + D(fprintf(stderr, "%*c> _tmp_162[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'and' inversion")); Token * _keyword; expr_ty c; if ( @@ -36983,7 +37255,7 @@ _tmp_160_rule(Parser *p) (c = inversion_rule(p)) // inversion ) { - D(fprintf(stderr, "%*c+ _tmp_160[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'and' inversion")); + D(fprintf(stderr, "%*c+ _tmp_162[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'and' inversion")); _res = c; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -36993,7 +37265,7 @@ _tmp_160_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_160[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_162[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'and' inversion")); } _res = NULL; @@ -37002,9 +37274,9 @@ _tmp_160_rule(Parser *p) return _res; } -// _tmp_161: slice | starred_expression +// _tmp_163: slice | starred_expression static void * -_tmp_161_rule(Parser *p) +_tmp_163_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37020,18 +37292,18 @@ _tmp_161_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_161[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slice")); + D(fprintf(stderr, "%*c> _tmp_163[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "slice")); expr_ty slice_var; if ( (slice_var = slice_rule(p)) // slice ) { - D(fprintf(stderr, "%*c+ _tmp_161[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slice")); + D(fprintf(stderr, "%*c+ _tmp_163[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "slice")); _res = slice_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_161[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_163[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "slice")); } { // starred_expression @@ -37039,18 +37311,18 @@ _tmp_161_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_161[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression")); + D(fprintf(stderr, "%*c> _tmp_163[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression")); expr_ty starred_expression_var; if ( (starred_expression_var = starred_expression_rule(p)) // starred_expression ) { - D(fprintf(stderr, "%*c+ _tmp_161[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression")); + D(fprintf(stderr, "%*c+ _tmp_163[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression")); _res = starred_expression_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_161[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_163[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "starred_expression")); } _res = NULL; @@ -37059,85 +37331,9 @@ _tmp_161_rule(Parser *p) return _res; } -// _tmp_162: fstring | string | tstring +// _tmp_164: 'if' disjunction static void * -_tmp_162_rule(Parser *p) -{ - if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { - _Pypegen_stack_overflow(p); - } - if (p->error_indicator) { - p->level--; - return NULL; - } - void * _res = NULL; - int _mark = p->mark; - { // fstring - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> _tmp_162[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "fstring")); - expr_ty fstring_var; - if ( - (fstring_var = fstring_rule(p)) // fstring - ) - { - D(fprintf(stderr, "%*c+ _tmp_162[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "fstring")); - _res = fstring_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_162[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "fstring")); - } - { // string - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> _tmp_162[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "string")); - expr_ty string_var; - if ( - (string_var = string_rule(p)) // string - ) - { - D(fprintf(stderr, "%*c+ _tmp_162[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "string")); - _res = string_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_162[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "string")); - } - { // tstring - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> _tmp_162[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "tstring")); - expr_ty tstring_var; - if ( - (tstring_var = tstring_rule(p)) // tstring - ) - { - D(fprintf(stderr, "%*c+ _tmp_162[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "tstring")); - _res = tstring_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_162[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "tstring")); - } - _res = NULL; - done: - p->level--; - return _res; -} - -// _tmp_163: 'if' disjunction -static void * -_tmp_163_rule(Parser *p) +_tmp_164_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37153,7 +37349,7 @@ _tmp_163_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_163[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'if' disjunction")); + D(fprintf(stderr, "%*c> _tmp_164[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'if' disjunction")); Token * _keyword; expr_ty z; if ( @@ -37162,7 +37358,7 @@ _tmp_163_rule(Parser *p) (z = disjunction_rule(p)) // disjunction ) { - D(fprintf(stderr, "%*c+ _tmp_163[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'if' disjunction")); + D(fprintf(stderr, "%*c+ _tmp_164[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'if' disjunction")); _res = z; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -37172,7 +37368,7 @@ _tmp_163_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_163[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_164[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'if' disjunction")); } _res = NULL; @@ -37181,9 +37377,9 @@ _tmp_163_rule(Parser *p) return _res; } -// _tmp_164: starred_expression | (assignment_expression | expression !':=') !'=' +// _tmp_165: starred_expression | (assignment_expression | expression !':=') !'=' static void * -_tmp_164_rule(Parser *p) +_tmp_165_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37199,18 +37395,18 @@ _tmp_164_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_164[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression")); + D(fprintf(stderr, "%*c> _tmp_165[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression")); expr_ty starred_expression_var; if ( (starred_expression_var = starred_expression_rule(p)) // starred_expression ) { - D(fprintf(stderr, "%*c+ _tmp_164[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression")); + D(fprintf(stderr, "%*c+ _tmp_165[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression")); _res = starred_expression_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_164[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_165[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "starred_expression")); } { // (assignment_expression | expression !':=') !'=' @@ -37218,20 +37414,20 @@ _tmp_164_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_164[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(assignment_expression | expression !':=') !'='")); - void *_tmp_87_var; + D(fprintf(stderr, "%*c> _tmp_165[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(assignment_expression | expression !':=') !'='")); + void *_tmp_88_var; if ( - (_tmp_87_var = _tmp_87_rule(p)) // assignment_expression | expression !':=' + (_tmp_88_var = _tmp_88_rule(p)) // assignment_expression | expression !':=' && _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 22) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_164[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(assignment_expression | expression !':=') !'='")); - _res = _tmp_87_var; + D(fprintf(stderr, "%*c+ _tmp_165[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "(assignment_expression | expression !':=') !'='")); + _res = _tmp_88_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_164[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_165[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(assignment_expression | expression !':=') !'='")); } _res = NULL; @@ -37240,9 +37436,9 @@ _tmp_164_rule(Parser *p) return _res; } -// _tmp_165: ',' star_target +// _tmp_166: ',' star_target static void * -_tmp_165_rule(Parser *p) +_tmp_166_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37258,7 +37454,7 @@ _tmp_165_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_165[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_target")); + D(fprintf(stderr, "%*c> _tmp_166[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' star_target")); Token * _literal; expr_ty c; if ( @@ -37267,7 +37463,7 @@ _tmp_165_rule(Parser *p) (c = star_target_rule(p)) // star_target ) { - D(fprintf(stderr, "%*c+ _tmp_165[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' star_target")); + D(fprintf(stderr, "%*c+ _tmp_166[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' star_target")); _res = c; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -37277,7 +37473,7 @@ _tmp_165_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_165[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_166[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' star_target")); } _res = NULL; @@ -37286,10 +37482,10 @@ _tmp_165_rule(Parser *p) return _res; } -// _tmp_166: +// _tmp_167: // | ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs static void * -_tmp_166_rule(Parser *p) +_tmp_167_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37305,24 +37501,24 @@ _tmp_166_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_166[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs")); - asdl_seq * _gather_89_var; + D(fprintf(stderr, "%*c> _tmp_167[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs")); + asdl_seq * _gather_90_var; Token * _literal; asdl_seq* kwargs_var; if ( - (_gather_89_var = _gather_89_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ + (_gather_90_var = _gather_90_rule(p)) // ','.(starred_expression | (assignment_expression | expression !':=') !'=')+ && (_literal = _PyPegen_expect_token(p, 12)) // token=',' && (kwargs_var = kwargs_rule(p)) // kwargs ) { - D(fprintf(stderr, "%*c+ _tmp_166[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs")); - _res = _PyPegen_dummy_name(p, _gather_89_var, _literal, kwargs_var); + D(fprintf(stderr, "%*c+ _tmp_167[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs")); + _res = _PyPegen_dummy_name(p, _gather_90_var, _literal, kwargs_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_166[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_167[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','.(starred_expression | (assignment_expression | expression !':=') !'=')+ ',' kwargs")); } _res = NULL; @@ -37331,9 +37527,9 @@ _tmp_166_rule(Parser *p) return _res; } -// _tmp_167: starred_expression !'=' +// _tmp_168: starred_expression !'=' static void * -_tmp_167_rule(Parser *p) +_tmp_168_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37349,7 +37545,7 @@ _tmp_167_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_167[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression !'='")); + D(fprintf(stderr, "%*c> _tmp_168[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "starred_expression !'='")); expr_ty starred_expression_var; if ( (starred_expression_var = starred_expression_rule(p)) // starred_expression @@ -37357,12 +37553,12 @@ _tmp_167_rule(Parser *p) _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 22) // token='=' ) { - D(fprintf(stderr, "%*c+ _tmp_167[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression !'='")); + D(fprintf(stderr, "%*c+ _tmp_168[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "starred_expression !'='")); _res = starred_expression_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_167[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_168[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "starred_expression !'='")); } _res = NULL; @@ -37371,9 +37567,9 @@ _tmp_167_rule(Parser *p) return _res; } -// _tmp_168: !STRING expression_without_invalid +// _tmp_169: !STRING expression_without_invalid static void * -_tmp_168_rule(Parser *p) +_tmp_169_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37389,20 +37585,20 @@ _tmp_168_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_168[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); + D(fprintf(stderr, "%*c> _tmp_169[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); expr_ty expression_without_invalid_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _PyPegen_string_token, p) + _PyPegen_lookahead(0, _PyPegen_string_token, p) && (expression_without_invalid_var = expression_without_invalid_rule(p)) // expression_without_invalid ) { - D(fprintf(stderr, "%*c+ _tmp_168[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); + D(fprintf(stderr, "%*c+ _tmp_169[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); _res = expression_without_invalid_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_168[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_169[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "!STRING expression_without_invalid")); } _res = NULL; @@ -37411,9 +37607,9 @@ _tmp_168_rule(Parser *p) return _res; } -// _tmp_169: ')' | '**' +// _tmp_170: ')' | '**' static void * -_tmp_169_rule(Parser *p) +_tmp_170_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37429,18 +37625,18 @@ _tmp_169_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_169[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 8)) // token=')' ) { - D(fprintf(stderr, "%*c+ _tmp_169[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_169[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_170[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "')'")); } { // '**' @@ -37448,18 +37644,18 @@ _tmp_169_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_169[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 35)) // token='**' ) { - D(fprintf(stderr, "%*c+ _tmp_169[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_169[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_170[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'**'")); } _res = NULL; @@ -37468,9 +37664,9 @@ _tmp_169_rule(Parser *p) return _res; } -// _tmp_170: ':' | '**' +// _tmp_171: ':' | '**' static void * -_tmp_170_rule(Parser *p) +_tmp_171_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37486,18 +37682,18 @@ _tmp_170_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c> _tmp_171[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 11)) // token=':' ) { - D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); + D(fprintf(stderr, "%*c+ _tmp_171[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "':'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_170[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_171[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "':'")); } { // '**' @@ -37505,18 +37701,18 @@ _tmp_170_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c> _tmp_171[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'**'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 35)) // token='**' ) { - D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); + D(fprintf(stderr, "%*c+ _tmp_171[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_170[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_171[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'**'")); } _res = NULL; @@ -37525,9 +37721,9 @@ _tmp_170_rule(Parser *p) return _res; } -// _loop0_171: (',' bitwise_or) +// _loop0_172: (',' bitwise_or) static asdl_seq * -_loop0_171_rule(Parser *p) +_loop0_172_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37552,13 +37748,13 @@ _loop0_171_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _loop0_171[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' bitwise_or)")); - void *_tmp_175_var; + D(fprintf(stderr, "%*c> _loop0_172[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "(',' bitwise_or)")); + void *_tmp_176_var; while ( - (_tmp_175_var = _tmp_175_rule(p)) // ',' bitwise_or + (_tmp_176_var = _tmp_176_rule(p)) // ',' bitwise_or ) { - _res = _tmp_175_var; + _res = _tmp_176_var; if (_n == _children_capacity) { _children_capacity *= 2; void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *)); @@ -37575,7 +37771,7 @@ _loop0_171_rule(Parser *p) _mark = p->mark; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _loop0_171[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _loop0_172[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "(',' bitwise_or)")); } asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena); @@ -37592,9 +37788,9 @@ _loop0_171_rule(Parser *p) return _seq; } -// _tmp_172: ',' | ')' | NEWLINE +// _tmp_173: ',' | ')' | ';' | NEWLINE static void * -_tmp_172_rule(Parser *p) +_tmp_173_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37610,18 +37806,18 @@ _tmp_172_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_172[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c> _tmp_173[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "','")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 12)) // token=',' ) { - D(fprintf(stderr, "%*c+ _tmp_172[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); + D(fprintf(stderr, "%*c+ _tmp_173[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_172[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_173[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "','")); } { // ')' @@ -37629,37 +37825,56 @@ _tmp_172_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_172[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c> _tmp_173[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "')'")); Token * _literal; if ( (_literal = _PyPegen_expect_token(p, 8)) // token=')' ) { - D(fprintf(stderr, "%*c+ _tmp_172[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); + D(fprintf(stderr, "%*c+ _tmp_173[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "')'")); _res = _literal; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_172[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_173[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "')'")); } + { // ';' + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> _tmp_173[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "';'")); + Token * _literal; + if ( + (_literal = _PyPegen_expect_token(p, 13)) // token=';' + ) + { + D(fprintf(stderr, "%*c+ _tmp_173[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "';'")); + _res = _literal; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s _tmp_173[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "';'")); + } { // NEWLINE if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_172[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE")); + D(fprintf(stderr, "%*c> _tmp_173[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NEWLINE")); Token * newline_var; if ( (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) { - D(fprintf(stderr, "%*c+ _tmp_172[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE")); + D(fprintf(stderr, "%*c+ _tmp_173[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE")); _res = newline_var; goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_172[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_173[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NEWLINE")); } _res = NULL; @@ -37668,9 +37883,9 @@ _tmp_172_rule(Parser *p) return _res; } -// _tmp_173: expression ['as' star_target] +// _tmp_174: expression ['as' star_target] static void * -_tmp_173_rule(Parser *p) +_tmp_174_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37686,22 +37901,22 @@ _tmp_173_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_173[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression ['as' star_target]")); + D(fprintf(stderr, "%*c> _tmp_174[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression ['as' star_target]")); void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings expr_ty expression_var; if ( (expression_var = expression_rule(p)) // expression && - (_opt_var = _tmp_176_rule(p), !p->error_indicator) // ['as' star_target] + (_opt_var = _tmp_177_rule(p), !p->error_indicator) // ['as' star_target] ) { - D(fprintf(stderr, "%*c+ _tmp_173[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ['as' star_target]")); + D(fprintf(stderr, "%*c+ _tmp_174[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ['as' star_target]")); _res = _PyPegen_dummy_name(p, expression_var, _opt_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_173[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_174[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression ['as' star_target]")); } _res = NULL; @@ -37710,9 +37925,9 @@ _tmp_173_rule(Parser *p) return _res; } -// _tmp_174: expressions ['as' star_target] +// _tmp_175: expressions ['as' star_target] static void * -_tmp_174_rule(Parser *p) +_tmp_175_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37728,22 +37943,22 @@ _tmp_174_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_174[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expressions ['as' star_target]")); + D(fprintf(stderr, "%*c> _tmp_175[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expressions ['as' star_target]")); void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings expr_ty expressions_var; if ( (expressions_var = expressions_rule(p)) // expressions && - (_opt_var = _tmp_176_rule(p), !p->error_indicator) // ['as' star_target] + (_opt_var = _tmp_177_rule(p), !p->error_indicator) // ['as' star_target] ) { - D(fprintf(stderr, "%*c+ _tmp_174[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expressions ['as' star_target]")); + D(fprintf(stderr, "%*c+ _tmp_175[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expressions ['as' star_target]")); _res = _PyPegen_dummy_name(p, expressions_var, _opt_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_174[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_175[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expressions ['as' star_target]")); } _res = NULL; @@ -37752,9 +37967,9 @@ _tmp_174_rule(Parser *p) return _res; } -// _tmp_175: ',' bitwise_or +// _tmp_176: ',' bitwise_or static void * -_tmp_175_rule(Parser *p) +_tmp_176_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37770,7 +37985,7 @@ _tmp_175_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_175[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' bitwise_or")); + D(fprintf(stderr, "%*c> _tmp_176[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "',' bitwise_or")); Token * _literal; expr_ty bitwise_or_var; if ( @@ -37779,12 +37994,12 @@ _tmp_175_rule(Parser *p) (bitwise_or_var = bitwise_or_rule(p)) // bitwise_or ) { - D(fprintf(stderr, "%*c+ _tmp_175[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' bitwise_or")); + D(fprintf(stderr, "%*c+ _tmp_176[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "',' bitwise_or")); _res = _PyPegen_dummy_name(p, _literal, bitwise_or_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_175[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_176[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "',' bitwise_or")); } _res = NULL; @@ -37793,9 +38008,9 @@ _tmp_175_rule(Parser *p) return _res; } -// _tmp_176: 'as' star_target +// _tmp_177: 'as' star_target static void * -_tmp_176_rule(Parser *p) +_tmp_177_rule(Parser *p) { if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) { _Pypegen_stack_overflow(p); @@ -37811,7 +38026,7 @@ _tmp_176_rule(Parser *p) p->level--; return NULL; } - D(fprintf(stderr, "%*c> _tmp_176[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'as' star_target")); + D(fprintf(stderr, "%*c> _tmp_177[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'as' star_target")); Token * _keyword; expr_ty star_target_var; if ( @@ -37820,12 +38035,12 @@ _tmp_176_rule(Parser *p) (star_target_var = star_target_rule(p)) // star_target ) { - D(fprintf(stderr, "%*c+ _tmp_176[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'as' star_target")); + D(fprintf(stderr, "%*c+ _tmp_177[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'as' star_target")); _res = _PyPegen_dummy_name(p, _keyword, star_target_var); goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_176[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s _tmp_177[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'as' star_target")); } _res = NULL; diff --git a/Parser/pegen.c b/Parser/pegen.c index 3efeba78450d1a..50641de27d37fd 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -379,44 +379,34 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres) return 0; } -int -_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED to disable sanitizer checks on -// undefined behavior (UBsan) in this function, rather than changing 'func' -// callback API. -int _Py_NO_SANITIZE_UNDEFINED -_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} +#define LOOKAHEAD1(NAME, RES_TYPE) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *), Parser *p) \ + { \ + int mark = p->mark; \ + void *res = func(p); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD1(_PyPegen_lookahead, void *) +LOOKAHEAD1(_PyPegen_lookahead_for_expr, expr_ty) +LOOKAHEAD1(_PyPegen_lookahead_for_stmt, stmt_ty) +#undef LOOKAHEAD1 + +#define LOOKAHEAD2(NAME, RES_TYPE, T) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *, T), Parser *p, T arg) \ + { \ + int mark = p->mark; \ + void *res = func(p, arg); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD2(_PyPegen_lookahead_with_int, Token *, int) +LOOKAHEAD2(_PyPegen_lookahead_with_string, expr_ty, const char *) +#undef LOOKAHEAD2 Token * _PyPegen_expect_token(Parser *p, int type) @@ -549,6 +539,21 @@ _PyPegen_new_identifier(Parser *p, const char *n) } id = id2; } + static const char * const forbidden[] = { + "None", + "True", + "False", + NULL + }; + for (int i = 0; forbidden[i] != NULL; i++) { + if (_PyUnicode_EqualToASCIIString(id, forbidden[i])) { + PyErr_Format(PyExc_ValueError, + "identifier field can't represent '%s' constant", + forbidden[i]); + Py_DECREF(id); + goto error; + } + } PyInterpreterState *interp = _PyInterpreterState_GET(); _PyUnicode_InternImmortal(interp, &id); if (_PyArena_AddPyObject(p->arena, id) < 0) @@ -605,7 +610,8 @@ expr_ty _PyPegen_soft_keyword_token(Parser *p) { Py_ssize_t size; PyBytes_AsStringAndSize(t->bytes, &the_token, &size); for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { - if (strncmp(*keyword, the_token, (size_t)size) == 0) { + if (strlen(*keyword) == (size_t)size && + strncmp(*keyword, the_token, (size_t)size) == 0) { return _PyPegen_name_from_token(p, t); } } diff --git a/Parser/pegen.h b/Parser/pegen.h index 1862fd7407ec3c..dfa2b0b4fe726c 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -145,10 +145,11 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node); int _PyPegen_update_memo(Parser *p, int mark, int type, void *node); int _PyPegen_is_memoized(Parser *p, int type, void *pres); -int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); -int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); -int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); +int _PyPegen_lookahead_for_expr(int, expr_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_for_stmt(int, stmt_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); +int _PyPegen_lookahead_with_string(int, expr_ty (func)(Parser *, const char*), Parser *, const char*); Token *_PyPegen_expect_token(Parser *p, int type); void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected); @@ -350,6 +351,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok); expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok); expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok); +expr_ty _PyPegen_concatenate_tstrings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *); expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *); expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int); expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty); diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index f62b8695995617..0639a4e42436be 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -2,6 +2,7 @@ #include <errcode.h> #include "pycore_pyerrors.h" // _PyErr_ProgramDecodedTextObject() +#include "pycore_runtime.h" // _Py_ID() #include "lexer/state.h" #include "lexer/lexer.h" #include "pegen.h" @@ -23,6 +24,13 @@ _PyPegen_raise_tokenizer_init_error(PyObject *filename) PyObject *value; PyObject *tback; PyErr_Fetch(&type, &value, &tback); + if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) { + if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) { + goto error; + } + PyErr_Restore(type, value, tback); + return; + } errstr = PyObject_Str(value); if (!errstr) { goto error; diff --git a/Parser/string_parser.c b/Parser/string_parser.c index d3631b114c5a3c..ebe68989d1af58 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -196,15 +196,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) len = (size_t)(p - buf); s = buf; - const char *first_invalid_escape; - v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, + &first_invalid_escape_char, + &first_invalid_escape_ptr); // HACK: later we can simply pass the line no, since we don't preserve the tokens // when we are decoding the string but we preserve the line numbers. - if (v != NULL && first_invalid_escape != NULL && t != NULL) { - if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { - /* We have not decref u before because first_invalid_escape points - inside u. */ + if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) { + if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) { + /* We have not decref u before because first_invalid_escape_ptr + points inside u. */ Py_XDECREF(u); Py_DECREF(v); return NULL; @@ -217,14 +220,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) static PyObject * decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) { - const char *first_invalid_escape; - PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) { return NULL; } - if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { + if (first_invalid_escape_ptr != NULL) { + if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) { Py_DECREF(result); return NULL; } diff --git a/Parser/tokenizer/file_tokenizer.c b/Parser/tokenizer/file_tokenizer.c index 01e473f58a0777..8c836a3f725829 100644 --- a/Parser/tokenizer/file_tokenizer.c +++ b/Parser/tokenizer/file_tokenizer.c @@ -282,10 +282,8 @@ tok_underflow_interactive(struct tok_state *tok) { } static int -tok_underflow_file(struct tok_state *tok) { - if (tok->start == NULL && !INSIDE_FSTRING(tok)) { - tok->cur = tok->inp = tok->buf; - } +tok_underflow_file(struct tok_state *tok) +{ if (tok->decoding_state == STATE_INIT) { /* We have not yet determined the encoding. If an encoding is found, use the file-pointer @@ -296,8 +294,16 @@ tok_underflow_file(struct tok_state *tok) { } assert(tok->decoding_state != STATE_INIT); } + int raw = tok->decoding_readline == NULL; + if (raw && tok->decoding_state != STATE_NORMAL) { + /* Keep the first line in the buffer to validate it later if + * the encoding has not yet been determined. */ + } + else if (tok->start == NULL && !INSIDE_FSTRING(tok)) { + tok->cur = tok->inp = tok->buf; + } /* Read until '\n' or EOF */ - if (tok->decoding_readline != NULL) { + if (!raw) { /* We already have a codec associated with this input. */ if (!tok_readline_recode(tok)) { return 0; @@ -328,20 +334,35 @@ tok_underflow_file(struct tok_state *tok) { ADVANCE_LINENO(); if (tok->decoding_state != STATE_NORMAL) { - if (tok->lineno > 2) { - tok->decoding_state = STATE_NORMAL; - } - else if (!_PyTokenizer_check_coding_spec(tok->cur, strlen(tok->cur), + if (!_PyTokenizer_check_coding_spec(tok->cur, strlen(tok->cur), tok, fp_setreadl)) { return 0; } + if (tok->lineno >= 2) { + tok->decoding_state = STATE_NORMAL; + } } - /* The default encoding is UTF-8, so make sure we don't have any - non-UTF-8 sequences in it. */ - if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) { - _PyTokenizer_error_ret(tok); - return 0; + if (raw && tok->decoding_state == STATE_NORMAL) { + const char *line = tok->lineno <= 2 ? tok->buf : tok->cur; + int lineno = tok->lineno <= 2 ? 1 : tok->lineno; + if (!tok->encoding) { + /* The default encoding is UTF-8, so make sure we don't have any + non-UTF-8 sequences in it. */ + if (!_PyTokenizer_ensure_utf8(line, tok, lineno)) { + _PyTokenizer_error_ret(tok); + return 0; + } + } + else { + PyObject *tmp = PyUnicode_Decode(line, strlen(line), + tok->encoding, NULL); + if (tmp == NULL) { + _PyTokenizer_error_ret(tok); + return 0; + } + Py_DECREF(tmp); + } } assert(tok->done == E_OK); return tok->done == E_OK; diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index 5a416adb875aa1..e5e2eed2d34aee 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -47,8 +47,10 @@ _syntaxerror_range(struct tok_state *tok, const char *format, goto error; } - args = Py_BuildValue("(O(OiiNii))", errmsg, tok->filename, tok->lineno, - col_offset, errtext, tok->lineno, end_col_offset); + args = Py_BuildValue("(O(OiiNii))", errmsg, + tok->filename ? tok->filename : Py_None, + tok->lineno, col_offset, errtext, + tok->lineno, end_col_offset); if (args) { PyErr_SetObject(PyExc_SyntaxError, args); Py_DECREF(args); @@ -422,10 +424,13 @@ _PyTokenizer_check_coding_spec(const char* line, Py_ssize_t size, struct tok_sta tok->encoding = cs; } else { /* then, compare cs with BOM */ if (strcmp(tok->encoding, cs) != 0) { - _PyTokenizer_error_ret(tok); - PyErr_Format(PyExc_SyntaxError, - "encoding problem: %s with BOM", cs); + tok->line_start = line; + tok->cur = (char *)line; + assert(size <= INT_MAX); + _PyTokenizer_syntaxerror_known_range(tok, 0, (int)size, + "encoding problem: %s with BOM", cs); PyMem_Free(cs); + _PyTokenizer_error_ret(tok); return 0; } PyMem_Free(cs); @@ -496,24 +501,38 @@ valid_utf8(const unsigned char* s) } int -_PyTokenizer_ensure_utf8(char *line, struct tok_state *tok) +_PyTokenizer_ensure_utf8(const char *line, struct tok_state *tok, int lineno) { - int badchar = 0; - unsigned char *c; + const char *badchar = NULL; + const char *c; int length; - for (c = (unsigned char *)line; *c; c += length) { - if (!(length = valid_utf8(c))) { - badchar = *c; + int col_offset = 0; + const char *line_start = line; + for (c = line; *c; c += length) { + if (!(length = valid_utf8((const unsigned char *)c))) { + badchar = c; break; } + col_offset++; + if (*c == '\n') { + lineno++; + col_offset = 0; + line_start = c + 1; + } } if (badchar) { - PyErr_Format(PyExc_SyntaxError, - "Non-UTF-8 code starting with '\\x%.2x' " - "in file %U on line %i, " - "but no encoding declared; " - "see https://peps.python.org/pep-0263/ for details", - badchar, tok->filename, tok->lineno); + tok->lineno = lineno; + tok->line_start = line_start; + tok->cur = (char *)badchar; + _PyTokenizer_syntaxerror_known_range(tok, + col_offset + 1, col_offset + 1, + "Non-UTF-8 code starting with '\\x%.2x'" + "%s%V on line %i, " + "but no encoding declared; " + "see https://peps.python.org/pep-0263/ for details", + (unsigned char)*badchar, + tok->filename ? " in file " : "", tok->filename, "", + lineno); return 0; } return 1; diff --git a/Parser/tokenizer/helpers.h b/Parser/tokenizer/helpers.h index 42ea13cd1f853f..98f6445d5a3b40 100644 --- a/Parser/tokenizer/helpers.h +++ b/Parser/tokenizer/helpers.h @@ -26,7 +26,7 @@ int _PyTokenizer_check_bom(int get_char(struct tok_state *), struct tok_state *tok); int _PyTokenizer_check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, int set_readline(struct tok_state *, const char *)); -int _PyTokenizer_ensure_utf8(char *line, struct tok_state *tok); +int _PyTokenizer_ensure_utf8(const char *line, struct tok_state *tok, int lineno); #ifdef Py_DEBUG void _PyTokenizer_print_escape(FILE *f, const char *s, Py_ssize_t size); diff --git a/Parser/tokenizer/readline_tokenizer.c b/Parser/tokenizer/readline_tokenizer.c index 22f84c77a12b47..0f7769aeb8fd57 100644 --- a/Parser/tokenizer/readline_tokenizer.c +++ b/Parser/tokenizer/readline_tokenizer.c @@ -97,7 +97,7 @@ tok_underflow_readline(struct tok_state* tok) { ADVANCE_LINENO(); /* The default encoding is UTF-8, so make sure we don't have any non-UTF-8 sequences in it. */ - if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) { + if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok, tok->lineno)) { _PyTokenizer_error_ret(tok); return 0; } diff --git a/Parser/tokenizer/string_tokenizer.c b/Parser/tokenizer/string_tokenizer.c index 0c26d5df8d4a40..7299ecf483ccd9 100644 --- a/Parser/tokenizer/string_tokenizer.c +++ b/Parser/tokenizer/string_tokenizer.c @@ -86,15 +86,18 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr /* need to check line 1 and 2 separately since check_coding_spec assumes a single line as input */ if (newl[0]) { + tok->lineno = 1; if (!_PyTokenizer_check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) { return NULL; } if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) { + tok->lineno = 2; if (!_PyTokenizer_check_coding_spec(newl[0]+1, newl[1] - newl[0], tok, buf_setreadl)) return NULL; } } + tok->lineno = 0; if (tok->enc != NULL) { assert(utf8 == NULL); utf8 = _PyTokenizer_translate_into_utf8(str, tok->enc); @@ -102,6 +105,9 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr return _PyTokenizer_error_ret(tok); str = PyBytes_AS_STRING(utf8); } + else if (!_PyTokenizer_ensure_utf8(str, tok, 1)) { + return _PyTokenizer_error_ret(tok); + } assert(tok->decoding_buffer == NULL); tok->decoding_buffer = utf8; /* CAUTION */ return str; diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 8a7412c7019e60..fe3e656a22ae31 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -360,8 +360,18 @@ static int test_pre_initialization_sys_options(void) size_t xoption_len = wcslen(static_xoption); wchar_t *dynamic_once_warnoption = \ (wchar_t *) calloc(warnoption_len+1, sizeof(wchar_t)); + if (dynamic_once_warnoption == NULL) { + error("out of memory allocating warnoption"); + return 1; + } wchar_t *dynamic_xoption = \ (wchar_t *) calloc(xoption_len+1, sizeof(wchar_t)); + if (dynamic_xoption == NULL) { + free(dynamic_once_warnoption); + error("out of memory allocating xoption"); + return 1; + } + wcsncpy(dynamic_once_warnoption, static_warnoption, warnoption_len+1); wcsncpy(dynamic_xoption, static_xoption, xoption_len+1); @@ -403,9 +413,9 @@ static int test_pre_initialization_sys_options(void) /* bpo-20891: Avoid race condition when initialising the GIL */ -static void bpo20891_thread(void *lockp) +static void bpo20891_thread(void *eventp) { - PyThread_type_lock lock = *((PyThread_type_lock*)lockp); + PyEvent *event = (PyEvent *)eventp; PyGILState_STATE state = PyGILState_Ensure(); if (!PyGILState_Check()) { @@ -414,8 +424,7 @@ static void bpo20891_thread(void *lockp) } PyGILState_Release(state); - - PyThread_release_lock(lock); + _PyEvent_Notify(event); } static int test_bpo20891(void) @@ -425,27 +434,17 @@ static int test_bpo20891(void) /* bpo-20891: Calling PyGILState_Ensure in a non-Python thread must not crash. */ - PyThread_type_lock lock = PyThread_allocate_lock(); - if (!lock) { - error("PyThread_allocate_lock failed!"); - return 1; - } _testembed_Py_InitializeFromConfig(); + PyEvent event = {0}; - unsigned long thrd = PyThread_start_new_thread(bpo20891_thread, &lock); + unsigned long thrd = PyThread_start_new_thread(bpo20891_thread, &event); if (thrd == PYTHREAD_INVALID_THREAD_ID) { error("PyThread_start_new_thread failed!"); return 1; } - PyThread_acquire_lock(lock, WAIT_LOCK); - - Py_BEGIN_ALLOW_THREADS - /* wait until the thread exit */ - PyThread_acquire_lock(lock, WAIT_LOCK); - Py_END_ALLOW_THREADS - PyThread_free_lock(lock); + PyEvent_Wait(&event); Py_Finalize(); @@ -1424,9 +1423,12 @@ static int test_audit_subinterpreter(void) PySys_AddAuditHook(_audit_subinterpreter_hook, NULL); _testembed_Py_InitializeFromConfig(); - Py_NewInterpreter(); - Py_NewInterpreter(); - Py_NewInterpreter(); + PyThreadState *tstate = PyThreadState_Get(); + for (int i = 0; i < 3; ++i) + { + Py_EndInterpreter(Py_NewInterpreter()); + PyThreadState_Swap(tstate); + } Py_Finalize(); @@ -2114,15 +2116,20 @@ static int check_use_frozen_modules(const char *rawval) if (rawval == NULL) { wcscpy(optval, L"frozen_modules"); } - else if (swprintf(optval, 100, -#if defined(_MSC_VER) - L"frozen_modules=%S", -#else - L"frozen_modules=%s", -#endif - rawval) < 0) { - error("rawval is too long"); - return -1; + else { + wchar_t *val = Py_DecodeLocale(rawval, NULL); + if (val == NULL) { + error("unable to decode TESTFROZEN"); + return -1; + } + wcscpy(optval, L"frozen_modules="); + if ((wcslen(optval) + wcslen(val)) >= Py_ARRAY_LENGTH(optval)) { + error("TESTFROZEN is too long"); + PyMem_RawFree(val); + return -1; + } + wcscat(optval, val); + PyMem_RawFree(val); } PyConfig config; @@ -2274,6 +2281,177 @@ static int test_repeated_init_and_inittab(void) return 0; } +/// Multi-phase initialization package & submodule /// + +int +mp_pkg_exec(PyObject *mod) +{ + // make this a namespace package + // empty list = namespace package + if (PyModule_Add(mod, "__path__", PyList_New(0)) < 0) { + return -1; + } + if (PyModule_AddStringConstant(mod, "mp_pkg_exec_slot_ran", "yes") < 0) { + return -1; + } + return 0; +} + +static PyModuleDef_Slot mp_pkg_slots[] = { + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {Py_mod_exec, mp_pkg_exec}, + {0, NULL} +}; + +static struct PyModuleDef mp_pkg_def = { + PyModuleDef_HEAD_INIT, + .m_name = "mp_pkg", + .m_size = 0, + .m_slots = mp_pkg_slots, +}; + +PyMODINIT_FUNC +PyInit_mp_pkg(void) +{ + return PyModuleDef_Init(&mp_pkg_def); +} + +static PyObject * +submod_greet(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyUnicode_FromString("Hello from sub-module"); +} + +static PyMethodDef submod_methods[] = { + {"greet", submod_greet, METH_NOARGS, NULL}, + {NULL}, +}; + +int +mp_submod_exec(PyObject *mod) +{ + return PyModule_AddStringConstant(mod, "mp_submod_exec_slot_ran", "yes"); +} + +static PyModuleDef_Slot mp_submod_slots[] = { + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {Py_mod_exec, mp_submod_exec}, + {0, NULL} +}; + +static struct PyModuleDef mp_submod_def = { + PyModuleDef_HEAD_INIT, + .m_name = "mp_pkg.mp_submod", + .m_size = 0, + .m_methods = submod_methods, + .m_slots = mp_submod_slots, +}; + +PyMODINIT_FUNC +PyInit_mp_submod(void) +{ + return PyModuleDef_Init(&mp_submod_def); +} + +static int +test_inittab_submodule_multiphase(void) +{ + wchar_t* argv[] = { + PROGRAM_NAME, + L"-c", + L"import sys;" + L"import mp_pkg.mp_submod;" + L"print(mp_pkg.mp_submod);" + L"print(sys.modules['mp_pkg.mp_submod']);" + L"print(mp_pkg.mp_submod.greet());" + L"print(f'{mp_pkg.mp_submod.mp_submod_exec_slot_ran=}');" + L"print(f'{mp_pkg.mp_pkg_exec_slot_ran=}');" + }; + PyConfig config; + if (PyImport_AppendInittab("mp_pkg", + &PyInit_mp_pkg) != 0) { + fprintf(stderr, "PyImport_AppendInittab() failed\n"); + return 1; + } + if (PyImport_AppendInittab("mp_pkg.mp_submod", + &PyInit_mp_submod) != 0) { + fprintf(stderr, "PyImport_AppendInittab() failed\n"); + return 1; + } + PyConfig_InitPythonConfig(&config); + config.isolated = 1; + config_set_argv(&config, Py_ARRAY_LENGTH(argv), argv); + init_from_config_clear(&config); + return Py_RunMain(); +} + +/// Single-phase initialization package & submodule /// + +static struct PyModuleDef sp_pkg_def = { + PyModuleDef_HEAD_INIT, + .m_name = "sp_pkg", + .m_size = 0, +}; + +PyMODINIT_FUNC +PyInit_sp_pkg(void) +{ + PyObject *mod = PyModule_Create(&sp_pkg_def); + if (mod == NULL) { + return NULL; + } + // make this a namespace package + // empty list = namespace package + if (PyModule_Add(mod, "__path__", PyList_New(0)) < 0) { + Py_DECREF(mod); + return NULL; + } + return mod; +} + +static struct PyModuleDef sp_submod_def = { + PyModuleDef_HEAD_INIT, + .m_name = "sp_pkg.sp_submod", + .m_size = 0, + .m_methods = submod_methods, +}; + +PyMODINIT_FUNC +PyInit_sp_submod(void) +{ + return PyModule_Create(&sp_submod_def); +} + +static int +test_inittab_submodule_singlephase(void) +{ + wchar_t* argv[] = { + PROGRAM_NAME, + L"-c", + L"import sys;" + L"import sp_pkg.sp_submod;" + L"print(sp_pkg.sp_submod);" + L"print(sys.modules['sp_pkg.sp_submod']);" + L"print(sp_pkg.sp_submod.greet());" + }; + PyConfig config; + if (PyImport_AppendInittab("sp_pkg", + &PyInit_sp_pkg) != 0) { + fprintf(stderr, "PyImport_AppendInittab() failed\n"); + return 1; + } + if (PyImport_AppendInittab("sp_pkg.sp_submod", + &PyInit_sp_submod) != 0) { + fprintf(stderr, "PyImport_AppendInittab() failed\n"); + return 1; + } + PyConfig_InitPythonConfig(&config); + config.isolated = 1; + config_set_argv(&config, Py_ARRAY_LENGTH(argv), argv); + init_from_config_clear(&config); + return Py_RunMain(); +} + static void wrap_allocator(PyMemAllocatorEx *allocator); static void unwrap_allocator(PyMemAllocatorEx *allocator); @@ -2431,7 +2609,8 @@ static struct TestCase TestCases[] = { {"test_frozenmain", test_frozenmain}, #endif {"test_get_incomplete_frame", test_get_incomplete_frame}, - + {"test_inittab_submodule_multiphase", test_inittab_submodule_multiphase}, + {"test_inittab_submodule_singlephase", test_inittab_submodule_singlephase}, {NULL, NULL} }; diff --git a/Python/Python-ast.c b/Python/Python-ast.c index df035568f84be1..aac24ed7d3c0c5 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5293,7 +5293,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -5328,7 +5328,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { @@ -5528,6 +5528,32 @@ ast_type_replace_check(PyObject *self, Py_DECREF(unused); } } + + // Discard fields from 'expecting' that default to None + PyObject *field_types = NULL; + if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), + &_Py_ID(_field_types), + &field_types) < 0) + { + Py_DECREF(expecting); + return -1; + } + if (field_types != NULL) { + Py_ssize_t pos = 0; + PyObject *field_name, *field_type; + while (PyDict_Next(field_types, &pos, &field_name, &field_type)) { + if (_PyUnion_Check(field_type)) { + // optional field + if (PySet_Discard(expecting, field_name) < 0) { + Py_DECREF(expecting); + Py_DECREF(field_types); + return -1; + } + } + } + Py_DECREF(field_types); + } + // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); @@ -5770,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth) for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -5794,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth) } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -5898,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth) } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/_warnings.c b/Python/_warnings.c index 39bf1b225ccb0c..912468d2a59a95 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -1479,28 +1479,6 @@ PyErr_WarnExplicitObject(PyObject *category, PyObject *message, return 0; } -/* Like PyErr_WarnExplicitObject, but automatically sets up context */ -int -_PyErr_WarnExplicitObjectWithContext(PyObject *category, PyObject *message, - PyObject *filename, int lineno) -{ - PyObject *unused_filename, *module, *registry; - int unused_lineno; - int stack_level = 1; - - if (!setup_context(stack_level, NULL, &unused_filename, &unused_lineno, - &module, &registry)) { - return -1; - } - - int rc = PyErr_WarnExplicitObject(category, message, filename, lineno, - module, registry); - Py_DECREF(unused_filename); - Py_DECREF(registry); - Py_DECREF(module); - return rc; -} - int PyErr_WarnExplicit(PyObject *category, const char *text, const char *filename_str, int lineno, diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 0a3265dfeee204..a14e68c0e81932 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -9,9 +9,13 @@ # } _Py_trampoline_func_start: #ifdef __x86_64__ - sub $8, %rsp - call *%rcx - add $8, %rsp +#if defined(__CET__) && (__CET__ & 1) + endbr64 +#endif + push %rbp + mov %rsp, %rbp + call *%rcx + pop %rbp ret #endif // __x86_64__ #if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) @@ -34,3 +38,22 @@ _Py_trampoline_func_start: .globl _Py_trampoline_func_end _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits +# Note for indicating the assembly code supports CET +#if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: +#endif // __x86_64__ diff --git a/Python/ast_preprocess.c b/Python/ast_preprocess.c index bafd67ed790b20..fe6fd9479d1531 100644 --- a/Python/ast_preprocess.c +++ b/Python/ast_preprocess.c @@ -19,6 +19,7 @@ typedef struct { int optimize; int ff_features; int syntax_check_only; + int enable_warnings; _Py_c_array_t cf_finally; /* context for PEP 765 check */ int cf_finally_used; @@ -78,7 +79,7 @@ control_flow_in_finally_warning(const char *kw, stmt_ty n, _PyASTPreprocessState static int before_return(_PyASTPreprocessState *state, stmt_ty node_) { - if (state->cf_finally_used > 0) { + if (state->enable_warnings && state->cf_finally_used > 0) { ControlFlowInFinallyContext *ctx = get_cf_finally_top(state); if (ctx->in_finally && ! ctx->in_funcdef) { if (!control_flow_in_finally_warning("return", node_, state)) { @@ -92,7 +93,7 @@ before_return(_PyASTPreprocessState *state, stmt_ty node_) static int before_loop_exit(_PyASTPreprocessState *state, stmt_ty node_, const char *kw) { - if (state->cf_finally_used > 0) { + if (state->enable_warnings && state->cf_finally_used > 0) { ControlFlowInFinallyContext *ctx = get_cf_finally_top(state); if (ctx->in_finally && ! ctx->in_loop) { if (!control_flow_in_finally_warning(kw, node_, state)) { @@ -435,13 +436,38 @@ stmt_seq_remove_item(asdl_stmt_seq *stmts, Py_ssize_t idx) return 1; } +static int +remove_docstring(asdl_stmt_seq *stmts, Py_ssize_t idx, PyArena *ctx_) +{ + assert(_PyAST_GetDocString(stmts) != NULL); + // In case there's just the docstring in the body, replace it with `pass` + // keyword, so body won't be empty. + if (asdl_seq_LEN(stmts) == 1) { + stmt_ty docstring = (stmt_ty)asdl_seq_GET(stmts, 0); + stmt_ty pass = _PyAST_Pass( + docstring->lineno, docstring->col_offset, + // we know that `pass` always takes 4 chars and a single line, + // while docstring can span on multiple lines + docstring->lineno, docstring->col_offset + 4, + ctx_ + ); + if (pass == NULL) { + return 0; + } + asdl_seq_SET(stmts, 0, pass); + return 1; + } + // In case there are more than 1 body items, just remove the docstring. + return stmt_seq_remove_item(stmts, idx); +} + static int astfold_body(asdl_stmt_seq *stmts, PyArena *ctx_, _PyASTPreprocessState *state) { int docstring = _PyAST_GetDocString(stmts) != NULL; if (docstring && (state->optimize >= 2)) { /* remove the docstring */ - if (!stmt_seq_remove_item(stmts, 0)) { + if (!remove_docstring(stmts, 0, ctx_)) { return 0; } docstring = 0; @@ -943,7 +969,7 @@ astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTPreprocessState *st int _PyAST_Preprocess(mod_ty mod, PyArena *arena, PyObject *filename, int optimize, - int ff_features, int syntax_check_only) + int ff_features, int syntax_check_only, int enable_warnings) { _PyASTPreprocessState state; memset(&state, 0, sizeof(_PyASTPreprocessState)); @@ -951,6 +977,7 @@ _PyAST_Preprocess(mod_ty mod, PyArena *arena, PyObject *filename, int optimize, state.optimize = optimize; state.ff_features = ff_features; state.syntax_check_only = syntax_check_only; + state.enable_warnings = enable_warnings; if (_Py_CArray_Init(&state.cf_finally, sizeof(ControlFlowInFinallyContext), 20) < 0) { return -1; } diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index c121ec096aebf4..c25699978cf651 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -663,27 +663,15 @@ build_ftstring_body(asdl_expr_seq *values, bool is_format_spec) } static int -_write_values_subarray(PyUnicodeWriter *writer, asdl_expr_seq *values, Py_ssize_t first_idx, - Py_ssize_t last_idx, char prefix, PyArena *arena) +append_templatestr(PyUnicodeWriter *writer, expr_ty e) { int result = -1; - - asdl_expr_seq *new_values = _Py_asdl_expr_seq_new(last_idx - first_idx + 1, arena); - if (!new_values) { - return result; - } - - Py_ssize_t j = 0; - for (Py_ssize_t i = first_idx; i <= last_idx; ++i) { - asdl_seq_SET(new_values, j++, asdl_seq_GET(values, i)); - } - - PyObject *body = build_ftstring_body(new_values, false); + PyObject *body = build_ftstring_body(e->v.TemplateStr.values, false); if (!body) { - return result; + return -1; } - if (-1 != append_char(writer, prefix) && + if (-1 != append_charp(writer, "t") && -1 != append_repr(writer, body)) { result = 0; @@ -692,64 +680,6 @@ _write_values_subarray(PyUnicodeWriter *writer, asdl_expr_seq *values, Py_ssize_ return result; } -static int -append_templatestr(PyUnicodeWriter *writer, expr_ty e) -{ - PyArena *arena = _PyArena_New(); - if (!arena) { - return -1; - } - - Py_ssize_t last_idx = 0; - Py_ssize_t len = asdl_seq_LEN(e->v.TemplateStr.values); - for (Py_ssize_t i = 0; i < len; i++) { - expr_ty value = asdl_seq_GET(e->v.TemplateStr.values, i); - - // Handle implicit concat of t-strings with f-strings - if (value->kind == FormattedValue_kind) { - if (i > last_idx) { - // Create a new TemplateStr with the values between last_idx and i - // and append it to the writer. - if (_write_values_subarray(writer, e->v.TemplateStr.values, - last_idx, i - 1, 't', arena) == -1) { - goto error; - } - - if (append_charp(writer, " ") == -1) { - goto error; - } - } - - // Append the FormattedValue to the writer. - if (_write_values_subarray(writer, e->v.TemplateStr.values, - i, i, 'f', arena) == -1) { - goto error; - } - - if (i + 1 < len) { - if (append_charp(writer, " ") == -1) { - goto error; - } - } - - last_idx = i + 1; - } - } - - if (last_idx < len) { - if (_write_values_subarray(writer, e->v.TemplateStr.values, - last_idx, len - 1, 't', arena) == -1) { - goto error; - } - } - - return 0; - -error: - _PyArena_Free(arena); - return -1; -} - static int append_joinedstr(PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) { @@ -774,32 +704,37 @@ append_joinedstr(PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) } static int -append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +append_interpolation_str(PyUnicodeWriter *writer, PyObject *str) { const char *outer_brace = "{"; - /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis - around a lambda with ':' */ - PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); - if (!temp_fv_str) { - return -1; - } - if (PyUnicode_Find(temp_fv_str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { + if (PyUnicode_Find(str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { /* Expression starts with a brace, split it with a space from the outer one. */ outer_brace = "{ "; } if (-1 == append_charp(writer, outer_brace)) { - Py_DECREF(temp_fv_str); return -1; } - if (-1 == PyUnicodeWriter_WriteStr(writer, temp_fv_str)) { - Py_DECREF(temp_fv_str); + if (-1 == PyUnicodeWriter_WriteStr(writer, str)) { return -1; } - Py_DECREF(temp_fv_str); return 0; } +static int +append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +{ + /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis + around a lambda with ':' */ + PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); + if (!temp_fv_str) { + return -1; + } + int result = append_interpolation_str(writer, temp_fv_str); + Py_DECREF(temp_fv_str); + return result; +} + static int append_interpolation_conversion(PyUnicodeWriter *writer, int conversion) { @@ -843,7 +778,7 @@ append_interpolation_format_spec(PyUnicodeWriter *writer, expr_ty e) static int append_interpolation(PyUnicodeWriter *writer, expr_ty e) { - if (-1 == append_interpolation_value(writer, e->v.Interpolation.value)) { + if (-1 == append_interpolation_str(writer, e->v.Interpolation.str)) { return -1; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3d0295ee3883f2..9b9e56a5b117c0 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -745,7 +745,7 @@ builtin_chr(PyObject *module, PyObject *i) compile as builtin_compile source: object - filename: object(converter="PyUnicode_FSDecoder") + filename: unicode_fs_decoded mode: str flags: int = 0 dont_inherit: bool = False @@ -771,7 +771,7 @@ static PyObject * builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename, const char *mode, int flags, int dont_inherit, int optimize, int feature_version) -/*[clinic end generated code: output=b0c09c84f116d3d7 input=cc78e20e7c7682ba]*/ +/*[clinic end generated code: output=b0c09c84f116d3d7 input=8f0069edbdac381b]*/ { PyObject *source_copy; const char *str; @@ -889,7 +889,6 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename, error: result = NULL; finally: - Py_DECREF(filename); return result; } @@ -958,6 +957,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals) /*[clinic end generated code: output=0a0824aa70093116 input=7c7bce5299a89062]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *result = NULL, *source_copy; const char *str; @@ -971,35 +971,46 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, : "globals must be a dict"); return NULL; } - if (globals == Py_None) { + + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); - } + assert(globals != NULL); + Py_INCREF(globals); } - else if (locals == Py_None) - locals = Py_NewRef(globals); else { - Py_INCREF(locals); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + } + return NULL; + } + Py_INCREF(globals); } - if (globals == NULL || locals == NULL) { - PyErr_SetString(PyExc_TypeError, - "eval must be given globals and locals " - "when called without a frame"); - goto error; + if (locals != Py_None) { + Py_INCREF(locals); } - - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); + return NULL; + } } - if (r < 0) { + else { + locals = Py_NewRef(globals); + } + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1040,6 +1051,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, } error: + Py_XDECREF(globals); Py_XDECREF(locals); return result; } @@ -1070,29 +1082,44 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals, PyObject *closure) /*[clinic end generated code: output=7579eb4e7646743d input=25e989b6d87a3a21]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *v; - if (globals == Py_None) { + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); + assert(globals != NULL); + Py_INCREF(globals); + } + else { + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_SystemError, + "globals and locals cannot be NULL"); + } + goto error; } - if (!globals || !locals) { - PyErr_SetString(PyExc_SystemError, - "globals and locals cannot be NULL"); + Py_INCREF(globals); + } + + if (locals != Py_None) { + Py_INCREF(locals); + } + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); return NULL; } } - else if (locals == Py_None) { - locals = Py_NewRef(globals); - } else { - Py_INCREF(locals); + locals = Py_NewRef(globals); } if (!PyDict_Check(globals)) { @@ -1106,11 +1133,8 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, Py_TYPE(locals)->tp_name); goto error; } - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); - } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1187,11 +1211,13 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, } if (v == NULL) goto error; + Py_DECREF(globals); Py_DECREF(locals); Py_DECREF(v); Py_RETURN_NONE; error: + Py_XDECREF(globals); Py_XDECREF(locals); return NULL; } @@ -1241,10 +1267,21 @@ static PyObject * builtin_globals_impl(PyObject *module) /*[clinic end generated code: output=e5dd1527067b94d2 input=9327576f92bb48ba]*/ { - PyObject *d; - - d = PyEval_GetGlobals(); - return Py_XNewRef(d); + PyObject *globals; + if (_PyEval_GetFrame() != NULL) { + globals = PyEval_GetGlobals(); + assert(globals != NULL); + return Py_NewRef(globals); + } + PyThreadState *tstate = _PyThreadState_GET(); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(globals); } @@ -1464,34 +1501,27 @@ map_next(PyObject *self) } Py_ssize_t nargs = 0; - for (i=0; i < niters; i++) { + for (i = 0; i < niters; i++) { PyObject *it = PyTuple_GET_ITEM(lz->iters, i); PyObject *val = Py_TYPE(it)->tp_iternext(it); if (val == NULL) { if (lz->strict) { goto check; } - goto exit; + goto exit_no_result; } stack[i] = val; nargs++; } result = _PyObject_VectorcallTstate(tstate, lz->func, stack, nargs, NULL); + goto exit; -exit: - for (i=0; i < nargs; i++) { - Py_DECREF(stack[i]); - } - if (stack != small_stack) { - PyMem_Free(stack); - } - return result; check: if (PyErr_Occurred()) { if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { // next() on argument i raised an exception (not StopIteration) - return NULL; + goto exit_no_result; } PyErr_Clear(); } @@ -1499,9 +1529,10 @@ map_next(PyObject *self) // ValueError: map() argument 2 is shorter than argument 1 // ValueError: map() argument 3 is shorter than arguments 1-2 const char* plural = i == 1 ? " " : "s 1-"; - return PyErr_Format(PyExc_ValueError, - "map() argument %d is shorter than argument%s%d", - i + 1, plural, i); + PyErr_Format(PyExc_ValueError, + "map() argument %d is shorter than argument%s%d", + i + 1, plural, i); + goto exit_no_result; } for (i = 1; i < niters; i++) { PyObject *it = PyTuple_GET_ITEM(lz->iters, i); @@ -1509,21 +1540,33 @@ map_next(PyObject *self) if (val) { Py_DECREF(val); const char* plural = i == 1 ? " " : "s 1-"; - return PyErr_Format(PyExc_ValueError, - "map() argument %d is longer than argument%s%d", - i + 1, plural, i); + PyErr_Format(PyExc_ValueError, + "map() argument %d is longer than argument%s%d", + i + 1, plural, i); + goto exit_no_result; } if (PyErr_Occurred()) { if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { // next() on argument i raised an exception (not StopIteration) - return NULL; + goto exit_no_result; } PyErr_Clear(); } // Argument i is exhausted. So far so good... } // All arguments are exhausted. Success! - goto exit; + +exit_no_result: + assert(result == NULL); + +exit: + for (i = 0; i < nargs; i++) { + Py_DECREF(stack[i]); + } + if (stack != small_stack) { + PyMem_Free(stack); + } + return result; } static PyObject * @@ -1888,7 +1931,21 @@ static PyObject * builtin_locals_impl(PyObject *module) /*[clinic end generated code: output=b46c94015ce11448 input=7874018d478d5c4b]*/ { - return _PyEval_GetFrameLocals(); + PyObject *locals; + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + assert(locals != NULL || PyErr_Occurred()); + return locals; + } + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(locals); } @@ -2067,15 +2124,21 @@ builtin_oct(PyObject *module, PyObject *number) /*[clinic input] ord as builtin_ord - c: object + character as c: object / -Return the Unicode code point for a one-character string. +Return the ordinal value of a character. + +If the argument is a one-character string, return the Unicode code +point of that character. + +If the argument is a bytes or bytearray object of length 1, return its +single byte value. [clinic start generated code]*/ static PyObject * builtin_ord(PyObject *module, PyObject *c) -/*[clinic end generated code: output=4fa5e87a323bae71 input=3064e5d6203ad012]*/ +/*[clinic end generated code: output=4fa5e87a323bae71 input=98d38480432e1177]*/ { long ord; Py_ssize_t size; @@ -2624,7 +2687,22 @@ builtin_vars(PyObject *self, PyObject *args) if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) return NULL; if (v == NULL) { - d = _PyEval_GetFrameLocals(); + if (_PyEval_GetFrame() != NULL) { + d = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + d = _PyEval_GetGlobalsFromRunningMain(tstate); + if (d == NULL) { + if (!_PyErr_Occurred(tstate)) { + d = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(d); + } + } } else { if (PyObject_GetOptionalAttr(v, &_Py_ID(__dict__), &d) == 0) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a0766626402b0..70f8ac3cff84f9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -181,7 +181,15 @@ dummy_func( } tier1 op(_MAYBE_INSTRUMENT, (--)) { - if (tstate->tracing == 0) { + #ifdef Py_GIL_DISABLED + // For thread-safety, we need to check instrumentation version + // even when tracing. Otherwise, another thread may concurrently + // re-write the bytecode while we are executing this function. + int check_instrumentation = 1; + #else + int check_instrumentation = (tstate->tracing == 0); + #endif + if (check_instrumentation) { uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { @@ -3155,7 +3163,14 @@ dummy_func( replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -3179,7 +3194,14 @@ dummy_func( op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -3202,7 +3224,14 @@ dummy_func( inst(INSTRUMENTED_FOR_ITER, (unused/1, iter -- iter, next)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o != NULL) { next = PyStackRef_FromPyObjectSteal(next_o); INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); @@ -4719,6 +4748,7 @@ dummy_func( unused/1 + // Skip over the counter _CHECK_PEP_523 + _CHECK_FUNCTION_VERSION_KW + + _CHECK_RECURSION_REMAINING + _PY_FRAME_KW + _SAVE_RETURN_OFFSET + _PUSH_FRAME; diff --git a/Python/ceval.c b/Python/ceval.c index 490b653f132a6a..faca6fe81b977a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS @@ -333,13 +333,23 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (here_addr > _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES) { +#if _Py_STACK_GROWS_DOWN + if (here_addr > _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES) { +#else + if (here_addr <= _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES) { +#endif return 0; } if (_tstate->c_stack_hard_limit == 0) { _Py_InitializeRecursionLimits(tstate); } - return here_addr <= _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES; +#if _Py_STACK_GROWS_DOWN + return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES && + here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES; +#else + return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES && + here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES; +#endif } void @@ -347,7 +357,11 @@ _Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; +#if _Py_STACK_GROWS_DOWN if (here_addr < _tstate->c_stack_hard_limit) { +#else + if (here_addr > _tstate->c_stack_hard_limit) { +#endif Py_FatalError("Unchecked stack overflow."); } } @@ -360,9 +374,6 @@ _Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) # define Py_C_STACK_SIZE 1200000 #elif defined(__sparc__) # define Py_C_STACK_SIZE 1600000 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really the stack depth */ -# define Py_C_STACK_SIZE 131072 // wasi-libc DEFAULT_STACK_SIZE #elif defined(__hppa__) || defined(__powerpc64__) # define Py_C_STACK_SIZE 2000000 #else @@ -427,22 +438,28 @@ int pthread_attr_destroy(pthread_attr_t *a) #endif - -void -_Py_InitializeRecursionLimits(PyThreadState *tstate) +static void +hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp) { - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; #ifdef WIN32 ULONG_PTR low, high; GetCurrentThreadStackLimits(&low, &high); - _tstate->c_stack_top = (uintptr_t)high; + *top = (uintptr_t)high; ULONG guarantee = 0; SetThreadStackGuarantee(&guarantee); - _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + PYOS_STACK_MARGIN_BYTES; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + PYOS_STACK_MARGIN_BYTES; + *base = (uintptr_t)low + guarantee; +#elif defined(__APPLE__) + pthread_t this_thread = pthread_self(); + void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack + size_t stack_size = pthread_get_stacksize_np(this_thread); + *top = (uintptr_t)stack_addr; + *base = ((uintptr_t)stack_addr) - stack_size; #else - uintptr_t here_addr = _Py_get_machine_stack_pointer(); -# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && !defined(__NetBSD__) + /// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size + /// (on alpine at least) is much smaller than expected and imposes undue limits + /// compared to the old stack size estimation. (We assume musl is not glibc.) +# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \ + !defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__)) size_t stack_size, guard_size; void *stack_addr; pthread_attr_t attr; @@ -453,28 +470,118 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) err |= pthread_attr_destroy(&attr); } if (err == 0) { - uintptr_t base = ((uintptr_t)stack_addr) + guard_size; - _tstate->c_stack_top = base + stack_size; + *base = ((uintptr_t)stack_addr) + guard_size; + *top = (uintptr_t)stack_addr + stack_size; + return; + } +# endif + // Add some space for caller function then round to minimum page size + // This is a guess at the top of the stack, but should be a reasonably + // good guess if called from _PyThreadState_Attach when creating a thread. + // If the thread is attached deep in a call stack, then the guess will be poor. +#if _Py_STACK_GROWS_DOWN + uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE); + *top = top_addr; + *base = top_addr - Py_C_STACK_SIZE; +# else + uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE); + *base = base_addr; + *top = base_addr + Py_C_STACK_SIZE; +#endif +#endif +} + +static void +tstate_set_stack(PyThreadState *tstate, + uintptr_t base, uintptr_t top) +{ + assert(base < top); + assert((top - base) >= _PyOS_MIN_STACK_SIZE); + #ifdef _Py_THREAD_SANITIZER - // Thread sanitizer crashes if we use a bit more than half the stack. - _tstate->c_stack_soft_limit = base + (stack_size / 2); + // Thread sanitizer crashes if we use more than half the stack. + uintptr_t stacksize = top - base; +# if _Py_STACK_GROWS_DOWN + base += stacksize / 2; +# else + top -= stacksize / 2; +# endif +#endif + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; +#if _Py_STACK_GROWS_DOWN + _tstate->c_stack_top = top; + _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; + _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; +# ifndef NDEBUG + // Sanity checks + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); +# endif #else - _tstate->c_stack_soft_limit = base + PYOS_STACK_MARGIN_BYTES * 2; + _tstate->c_stack_top = base; + _tstate->c_stack_hard_limit = top - _PyOS_STACK_MARGIN_BYTES; + _tstate->c_stack_soft_limit = top - _PyOS_STACK_MARGIN_BYTES * 2; +# ifndef NDEBUG + // Sanity checks + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_hard_limit >= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit > ts->c_stack_top); +# endif #endif - _tstate->c_stack_hard_limit = base + PYOS_STACK_MARGIN_BYTES; - assert(_tstate->c_stack_soft_limit < here_addr); - assert(here_addr < _tstate->c_stack_top); +} + + +void +_Py_InitializeRecursionLimits(PyThreadState *tstate) +{ + uintptr_t base, top; + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + hardware_stack_limits(&base, &top, here_addr); + assert(top != 0); + + tstate_set_stack(tstate, base, top); + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + ts->c_stack_init_base = base; + ts->c_stack_init_top = top; +} + + +int +PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, + void *stack_start_addr, size_t stack_size) +{ + if (stack_size < _PyOS_MIN_STACK_SIZE) { + PyErr_Format(PyExc_ValueError, + "stack_size must be at least %zu bytes", + _PyOS_MIN_STACK_SIZE); + return -1; + } + + uintptr_t base = (uintptr_t)stack_start_addr; + uintptr_t top = base + stack_size; + tstate_set_stack(tstate, base, top); + return 0; +} + + +void +PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) +{ + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + if (ts->c_stack_init_top != 0) { + tstate_set_stack(tstate, + ts->c_stack_init_base, + ts->c_stack_init_top); return; } -# endif - _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); - _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + PYOS_STACK_MARGIN_BYTES); -#endif + + _Py_InitializeRecursionLimits(tstate); } + /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() - if the recursion_depth reaches recursion_limit. */ + if the stack pointer is between the stack base and c_stack_hard_limit. */ int _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) { @@ -482,9 +589,17 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) uintptr_t here_addr = _Py_get_machine_stack_pointer(); assert(_tstate->c_stack_soft_limit != 0); assert(_tstate->c_stack_hard_limit != 0); +#if _Py_STACK_GROWS_DOWN + assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES); if (here_addr < _tstate->c_stack_hard_limit) { /* Overflowing while handling an overflow. Give up. */ int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024; +#else + assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES); + if (here_addr > _tstate->c_stack_hard_limit) { + /* Overflowing while handling an overflow. Give up. */ + int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024; +#endif char buffer[80]; snprintf(buffer, 80, "Unrecoverable stack overflow (used %d kB)%s", kbytes_used, where); Py_FatalError(buffer); @@ -493,7 +608,11 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) return 0; } else { +#if _Py_STACK_GROWS_DOWN int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024; +#else + int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024; +#endif tstate->recursion_headroom++; _PyErr_Format(tstate, PyExc_RecursionError, "Stack overflow (used %d kB)%s", @@ -2107,6 +2226,7 @@ do_raise(PyThreadState *tstate, PyObject *exc, PyObject *cause) "calling %R should have returned an instance of " "BaseException, not %R", cause, Py_TYPE(fixed_cause)); + Py_DECREF(fixed_cause); goto raise_error; } Py_DECREF(cause); @@ -2425,6 +2545,10 @@ monitor_unwind(PyThreadState *tstate, do_monitor_exc(tstate, frame, instr, PY_MONITORING_EVENT_PY_UNWIND); } +bool +_PyEval_NoToolsForUnwind(PyThreadState *tstate) { + return no_tools_for_global_event(tstate, PY_MONITORING_EVENT_PY_UNWIND); +} static int monitor_handled(PyThreadState *tstate, @@ -2492,21 +2616,10 @@ PyEval_SetProfile(Py_tracefunc func, PyObject *arg) void PyEval_SetProfileAllThreads(Py_tracefunc func, PyObject *arg) { - PyThreadState *this_tstate = _PyThreadState_GET(); - PyInterpreterState* interp = this_tstate->interp; - - _PyRuntimeState *runtime = &_PyRuntime; - HEAD_LOCK(runtime); - PyThreadState* ts = PyInterpreterState_ThreadHead(interp); - HEAD_UNLOCK(runtime); - - while (ts) { - if (_PyEval_SetProfile(ts, func, arg) < 0) { - PyErr_FormatUnraisable("Exception ignored in PyEval_SetProfileAllThreads"); - } - HEAD_LOCK(runtime); - ts = PyThreadState_Next(ts); - HEAD_UNLOCK(runtime); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyEval_SetProfileAllThreads(interp, func, arg) < 0) { + /* Log _PySys_Audit() error */ + PyErr_FormatUnraisable("Exception ignored in PyEval_SetProfileAllThreads"); } } @@ -2523,21 +2636,10 @@ PyEval_SetTrace(Py_tracefunc func, PyObject *arg) void PyEval_SetTraceAllThreads(Py_tracefunc func, PyObject *arg) { - PyThreadState *this_tstate = _PyThreadState_GET(); - PyInterpreterState* interp = this_tstate->interp; - - _PyRuntimeState *runtime = &_PyRuntime; - HEAD_LOCK(runtime); - PyThreadState* ts = PyInterpreterState_ThreadHead(interp); - HEAD_UNLOCK(runtime); - - while (ts) { - if (_PyEval_SetTrace(ts, func, arg) < 0) { - PyErr_FormatUnraisable("Exception ignored in PyEval_SetTraceAllThreads"); - } - HEAD_LOCK(runtime); - ts = PyThreadState_Next(ts); - HEAD_UNLOCK(runtime); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyEval_SetTraceAllThreads(interp, func, arg) < 0) { + /* Log _PySys_Audit() error */ + PyErr_FormatUnraisable("Exception ignored in PyEval_SetTraceAllThreads"); } } @@ -2735,10 +2837,9 @@ _PyEval_GetFrameLocals(void) return locals; } -PyObject * -PyEval_GetGlobals(void) +static PyObject * +_PyEval_GetGlobals(PyThreadState *tstate) { - PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = _PyThreadState_GetFrame(tstate); if (current_frame == NULL) { return NULL; @@ -2746,6 +2847,120 @@ PyEval_GetGlobals(void) return current_frame->f_globals; } +PyObject * +PyEval_GetGlobals(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + return _PyEval_GetGlobals(tstate); +} + +PyObject * +_PyEval_GetGlobalsFromRunningMain(PyThreadState *tstate) +{ + if (!_PyInterpreterState_IsRunningMain(tstate->interp)) { + return NULL; + } + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + Py_XDECREF(mod); + return NULL; + } + PyObject *globals = PyModule_GetDict(mod); // borrowed + Py_DECREF(mod); + return globals; +} + +static PyObject * +get_globals_builtins(PyObject *globals) +{ + PyObject *builtins = NULL; + if (PyDict_Check(globals)) { + if (PyDict_GetItemRef(globals, &_Py_ID(__builtins__), &builtins) < 0) { + return NULL; + } + } + else { + if (PyMapping_GetOptionalItem( + globals, &_Py_ID(__builtins__), &builtins) < 0) + { + return NULL; + } + } + return builtins; +} + +static int +set_globals_builtins(PyObject *globals, PyObject *builtins) +{ + if (PyDict_Check(globals)) { + if (PyDict_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + else { + if (PyObject_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + return 0; +} + +int +_PyEval_EnsureBuiltins(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyEval_GetBuiltins(); // borrowed + if (builtins == NULL) { + assert(_PyErr_Occurred(tstate)); + return -1; + } + Py_INCREF(builtins); + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + +int +_PyEval_EnsureBuiltinsWithModule(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyImport_ImportModuleLevel("builtins", NULL, NULL, NULL, 0); + if (builtins == NULL) { + return -1; + } + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + PyObject* PyEval_GetFrameLocals(void) { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 5b5018a63731ab..aa68371ac8febf 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1218,30 +1218,30 @@ static inline int run_remote_debugger_source(PyObject *source) // Note that this function is inline to avoid creating a PLT entry // that would be an easy target for a ROP gadget. -static inline void run_remote_debugger_script(const char *path) +static inline void run_remote_debugger_script(PyObject *path) { - if (0 != PySys_Audit("remote_debugger_script", "s", path)) { + if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) { PyErr_FormatUnraisable( - "Audit hook failed for remote debugger script %s", path); + "Audit hook failed for remote debugger script %U", path); return; } // Open the debugger script with the open code hook, and reopen the // resulting file object to get a C FILE* object. - PyObject* fileobj = PyFile_OpenCode(path); + PyObject* fileobj = PyFile_OpenCodeObject(path); if (!fileobj) { - PyErr_FormatUnraisable("Can't open debugger script %s", path); + PyErr_FormatUnraisable("Can't open debugger script %U", path); return; } PyObject* source = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(read)); if (!source) { - PyErr_FormatUnraisable("Error reading debugger script %s", path); + PyErr_FormatUnraisable("Error reading debugger script %U", path); } PyObject* res = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(close)); if (!res) { - PyErr_FormatUnraisable("Error closing debugger script %s", path); + PyErr_FormatUnraisable("Error closing debugger script %U", path); } else { Py_DECREF(res); } @@ -1249,7 +1249,7 @@ static inline void run_remote_debugger_script(const char *path) if (source) { if (0 != run_remote_debugger_source(source)) { - PyErr_FormatUnraisable("Error executing debugger script %s", path); + PyErr_FormatUnraisable("Error executing debugger script %U", path); } Py_DECREF(source); } @@ -1278,7 +1278,14 @@ int _PyRunRemoteDebugger(PyThreadState *tstate) pathsz); path[pathsz - 1] = '\0'; if (*path) { - run_remote_debugger_script(path); + PyObject *path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) { + PyErr_FormatUnraisable("Can't decode debugger script"); + } + else { + run_remote_debugger_script(path_obj); + Py_DECREF(path_obj); + } } PyMem_Free(path); } @@ -1380,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 187ec8fdd26584..4a878d6dff4353 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -79,6 +79,14 @@ #endif #if Py_TAIL_CALL_INTERP +# if defined(__clang__) || defined(__GNUC__) +# if !_Py__has_attribute(preserve_none) || !_Py__has_attribute(musttail) +# error "This compiler does not have support for efficient tail calling." +# endif +# elif defined(_MSC_VER) +# error "Tail calling not supported for MSVC." +# endif + // Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment. # define Py_MUSTTAIL [[clang::musttail]] # define Py_PRESERVE_NONE_CC __attribute__((preserve_none)) @@ -368,7 +376,9 @@ do { \ frame = tstate->current_frame; \ stack_pointer = _PyFrame_GetStackPointer(frame); \ if (next_instr == NULL) { \ - next_instr = frame->instr_ptr; \ + /* gh-140104: The exception handler expects frame->instr_ptr + to after this_instr, not this_instr! */ \ + next_instr = frame->instr_ptr + 1; \ JUMP_TO_LABEL(error); \ } \ DISPATCH(); \ @@ -396,7 +406,9 @@ do { \ stack_pointer = _PyFrame_GetStackPointer(frame); \ if (next_instr == NULL) \ { \ - next_instr = frame->instr_ptr; \ + /* gh-140104: The exception handler expects frame->instr_ptr + to after this_instr, not this_instr! */ \ + next_instr = frame->instr_ptr + 1; \ goto error; \ } \ DISPATCH(); \ diff --git a/Python/clinic/bltinmodule.c.h b/Python/clinic/bltinmodule.c.h index c826a5724f769c..b44a8d1e2972e4 100644 --- a/Python/clinic/bltinmodule.c.h +++ b/Python/clinic/bltinmodule.c.h @@ -296,7 +296,7 @@ builtin_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyObject *argsbuf[7]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3; PyObject *source; - PyObject *filename; + PyObject *filename = NULL; const char *mode; int flags = 0; int dont_inherit = 0; @@ -367,6 +367,9 @@ builtin_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return_value = builtin_compile_impl(module, source, filename, mode, flags, dont_inherit, optimize, feature_version); exit: + /* Cleanup for filename */ + Py_XDECREF(filename); + return return_value; } @@ -831,10 +834,16 @@ PyDoc_STRVAR(builtin_oct__doc__, {"oct", (PyCFunction)builtin_oct, METH_O, builtin_oct__doc__}, PyDoc_STRVAR(builtin_ord__doc__, -"ord($module, c, /)\n" +"ord($module, character, /)\n" "--\n" "\n" -"Return the Unicode code point for a one-character string."); +"Return the ordinal value of a character.\n" +"\n" +"If the argument is a one-character string, return the Unicode code\n" +"point of that character.\n" +"\n" +"If the argument is a bytes or bytearray object of length 1, return its\n" +"single byte value."); #define BUILTIN_ORD_METHODDEF \ {"ord", (PyCFunction)builtin_ord, METH_O, builtin_ord__doc__}, @@ -1268,4 +1277,4 @@ builtin_issubclass(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=e7a5d0851d7f2cfb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b5d2231033c7591c input=a9049054013a1b77]*/ diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index a47e4d11b54441..09ce77fd12608f 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1793,6 +1793,37 @@ sys__baserepl(PyObject *module, PyObject *Py_UNUSED(ignored)) return sys__baserepl_impl(module); } +PyDoc_STRVAR(sys__clear_type_descriptors__doc__, +"_clear_type_descriptors($module, type, /)\n" +"--\n" +"\n" +"Private function for clearing certain descriptors from a type\'s dictionary.\n" +"\n" +"See gh-135228 for context."); + +#define SYS__CLEAR_TYPE_DESCRIPTORS_METHODDEF \ + {"_clear_type_descriptors", (PyCFunction)sys__clear_type_descriptors, METH_O, sys__clear_type_descriptors__doc__}, + +static PyObject * +sys__clear_type_descriptors_impl(PyObject *module, PyObject *type); + +static PyObject * +sys__clear_type_descriptors(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *type; + + if (!PyObject_TypeCheck(arg, &PyType_Type)) { + _PyArg_BadArgument("_clear_type_descriptors", "argument", (&PyType_Type)->tp_name, arg); + goto exit; + } + type = arg; + return_value = sys__clear_type_descriptors_impl(module, type); + +exit: + return return_value; +} + PyDoc_STRVAR(sys__is_gil_enabled__doc__, "_is_gil_enabled($module, /)\n" "--\n" @@ -1948,4 +1979,4 @@ _jit_is_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=449d16326e69dcf6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9052f399f40ca32d input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index caf8d9d5f3c188..4e9aecfe75c2c9 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1204,7 +1204,7 @@ get_standard_encoding_impl(const char *encoding, int *bytelength) } } } - else if (strcmp(encoding, "CP_UTF8") == 0) { + else if (strcmp(encoding, "cp65001") == 0) { *bytelength = 3; return ENC_UTF8; } diff --git a/Python/codegen.c b/Python/codegen.c index 683601103ec99d..bacd3460f2fa9c 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,8 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ +#include "pycore_template.h" // _PyTemplate_Type #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed @@ -3606,10 +3608,11 @@ infer_type(expr_ty e) return &PyGen_Type; case Lambda_kind: return &PyFunction_Type; - case JoinedStr_kind: case TemplateStr_kind: - case FormattedValue_kind: case Interpolation_kind: + return &_PyTemplate_Type; + case JoinedStr_kind: + case FormattedValue_kind: return &PyUnicode_Type; case Constant_kind: return Py_TYPE(e->v.Constant.value); @@ -3663,6 +3666,8 @@ check_subscripter(compiler *c, expr_ty e) case Set_kind: case SetComp_kind: case GeneratorExp_kind: + case TemplateStr_kind: + case Interpolation_kind: case Lambda_kind: { location loc = LOC(e); return _PyCompile_Warn(c, loc, "'%.200s' object is not subscriptable; " @@ -3697,9 +3702,7 @@ check_index(compiler *c, expr_ty e, expr_ty s) case List_kind: case ListComp_kind: case JoinedStr_kind: - case TemplateStr_kind: - case FormattedValue_kind: - case Interpolation_kind: { + case FormattedValue_kind: { location loc = LOC(e); return _PyCompile_Warn(c, loc, "%.200s indices must be integers " "or slices, not %.200s; " @@ -4069,16 +4072,6 @@ codegen_template_str(compiler *c, expr_ty e) } else { VISIT(c, expr, value); - Py_ssize_t j; - for (j = i + 1; j < value_count; j++) { - value = asdl_seq_GET(e->v.TemplateStr.values, j); - if (value->kind == Interpolation_kind) { - break; - } - VISIT(c, expr, value); - ADDOP_INPLACE(c, loc, Add); - } - i = j - 1; stringslen++; last_was_interpolation = 0; } @@ -4411,7 +4404,6 @@ codegen_sync_comprehension_generator(compiler *c, location loc, comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators, gen_index); - if (!iter_on_stack) { if (gen_index == 0) { assert(METADATA(c)->u_argcount == 1); @@ -4449,7 +4441,6 @@ codegen_sync_comprehension_generator(compiler *c, location loc, if (IS_JUMP_TARGET_LABEL(start)) { depth++; - ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); } @@ -4865,7 +4856,10 @@ codegen_comprehension(compiler *c, expr_ty e, int type, } Py_CLEAR(co); - VISIT(c, expr, outermost->iter); + if (codegen_comprehension_iter(c, outermost)) { + goto error; + } + ADDOP_I(c, loc, CALL, 0); if (is_async_comprehension && type != COMP_GENEXP) { @@ -5506,10 +5500,12 @@ codegen_annassign(compiler *c, stmt_ty s) RETURN_IF_ERROR(_PyCompile_AddDeferredAnnotation( c, s, &conditional_annotation_index)); if (conditional_annotation_index != NULL) { - ADDOP_NAME( - c, loc, - SCOPE_TYPE(c) == COMPILE_SCOPE_CLASS ? LOAD_DEREF : LOAD_NAME, - &_Py_ID(__conditional_annotations__), cellvars); + if (SCOPE_TYPE(c) == COMPILE_SCOPE_CLASS) { + ADDOP_NAME(c, loc, LOAD_DEREF, &_Py_ID(__conditional_annotations__), cellvars); + } + else { + ADDOP_NAME(c, loc, LOAD_NAME, &_Py_ID(__conditional_annotations__), names); + } ADDOP_LOAD_CONST_NEW(c, loc, conditional_annotation_index); ADDOP_I(c, loc, SET_ADD, 1); ADDOP(c, loc, POP_TOP); diff --git a/Python/compile.c b/Python/compile.c index c04391e682f9ac..e2f1c7e8eb5bce 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -103,6 +103,7 @@ typedef struct _PyCompiler { bool c_save_nested_seqs; /* if true, construct recursive instruction sequences * (including instructions for nested code objects) */ + int c_disable_warning; } compiler; static int @@ -135,7 +136,7 @@ compiler_setup(compiler *c, mod_ty mod, PyObject *filename, c->c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize; c->c_save_nested_seqs = false; - if (!_PyAST_Preprocess(mod, arena, filename, c->c_optimize, merged, 0)) { + if (!_PyAST_Preprocess(mod, arena, filename, c->c_optimize, merged, 0, 1)) { return ERROR; } c->c_st = _PySymtable_Build(mod, filename, &c->c_future); @@ -765,6 +766,9 @@ _PyCompile_PushFBlock(compiler *c, location loc, f->fb_loc = loc; f->fb_exit = exit; f->fb_datum = datum; + if (t == COMPILE_FBLOCK_FINALLY_END) { + c->c_disable_warning++; + } return SUCCESS; } @@ -776,6 +780,9 @@ _PyCompile_PopFBlock(compiler *c, fblocktype t, jump_target_label block_label) u->u_nfblocks--; assert(u->u_fblock[u->u_nfblocks].fb_type == t); assert(SAME_JUMP_TARGET_LABEL(u->u_fblock[u->u_nfblocks].fb_block, block_label)); + if (t == COMPILE_FBLOCK_FINALLY_END) { + c->c_disable_warning--; + } } fblockinfo * @@ -1203,6 +1210,9 @@ _PyCompile_Error(compiler *c, location loc, const char *format, ...) int _PyCompile_Warn(compiler *c, location loc, const char *format, ...) { + if (c->c_disable_warning) { + return 0; + } va_list vargs; va_start(vargs, format); PyObject *msg = PyUnicode_FromFormatV(format, vargs); @@ -1492,7 +1502,7 @@ _PyCompile_AstPreprocess(mod_ty mod, PyObject *filename, PyCompilerFlags *cf, if (optimize == -1) { optimize = _Py_GetConfig()->optimization_level; } - if (!_PyAST_Preprocess(mod, arena, filename, optimize, flags, no_const_folding)) { + if (!_PyAST_Preprocess(mod, arena, filename, optimize, flags, no_const_folding, 0)) { return -1; } return 0; diff --git a/Python/context.c b/Python/context.c index dceaae9b42979d..d1f8b7c2482181 100644 --- a/Python/context.c +++ b/Python/context.c @@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op) return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) { @@ -987,7 +987,7 @@ contextvar_tp_repr(PyObject *op) } if (self->var_default != NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) { @@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op) if (writer == NULL) { return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) { goto error; } if (self->tok_used) { - if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) { goto error; } } - if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { @@ -1357,11 +1357,8 @@ get_token_missing(void) PyStatus _PyContext_Init(PyInterpreterState *interp) { - if (!_Py_IsMainInterpreter(interp)) { - return _PyStatus_OK(); - } - PyObject *missing = get_token_missing(); + assert(PyUnstable_IsImmortal(missing)); if (PyDict_SetItemString( _PyType_GetDict(&PyContextToken_Type), "MISSING", missing)) { diff --git a/Python/critical_section.c b/Python/critical_section.c index 73857b85496316..218b580e95176d 100644 --- a/Python/critical_section.c +++ b/Python/critical_section.c @@ -24,11 +24,24 @@ _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m) // As an optimisation for locking the same object recursively, skip // locking if the mutex is currently locked by the top-most critical // section. - if (tstate->critical_section && - untag_critical_section(tstate->critical_section)->_cs_mutex == m) { - c->_cs_mutex = NULL; - c->_cs_prev = 0; - return; + // If the top-most critical section is a two-mutex critical section, + // then locking is skipped if either mutex is m. + if (tstate->critical_section) { + PyCriticalSection *prev = untag_critical_section(tstate->critical_section); + if (prev->_cs_mutex == m) { + c->_cs_mutex = NULL; + c->_cs_prev = 0; + return; + } + if (tstate->critical_section & _Py_CRITICAL_SECTION_TWO_MUTEXES) { + PyCriticalSection2 *prev2 = (PyCriticalSection2 *) + untag_critical_section(tstate->critical_section); + if (prev2->_cs_mutex2 == m) { + c->_cs_mutex = NULL; + c->_cs_prev = 0; + return; + } + } } c->_cs_mutex = NULL; c->_cs_prev = (uintptr_t)tstate->critical_section; @@ -130,6 +143,15 @@ PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) #endif } +#undef PyCriticalSection_BeginMutex +void +PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection_BeginMutex(c, m); +#endif +} + #undef PyCriticalSection_End void PyCriticalSection_End(PyCriticalSection *c) @@ -148,6 +170,15 @@ PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) #endif } +#undef PyCriticalSection2_BeginMutex +void +PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection2_BeginMutex(c, m1, m2); +#endif +} + #undef PyCriticalSection2_End void PyCriticalSection2_End(PyCriticalSection2 *c) diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 74ce02f1a26401..542253c14de9b8 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -6,8 +6,14 @@ #include "osdefs.h" // MAXPATHLEN #include "pycore_ceval.h" // _Py_simple_func #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_function.h" // _PyFunction_VerifyStateless() +#include "pycore_global_strings.h" // _Py_ID() +#include "pycore_import.h" // _PyImport_SetModule() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" // _PyNamespace_New() +#include "pycore_pythonrun.h" // _Py_SourceAsString() +#include "pycore_runtime.h" // _PyRuntime +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_typeobject.h" // _PyStaticType_InitBuiltin() @@ -19,6 +25,7 @@ _Py_GetMainfile(char *buffer, size_t maxlen) PyThreadState *tstate = _PyThreadState_GET(); PyObject *module = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(module) < 0) { + Py_XDECREF(module); return -1; } Py_ssize_t size = _PyModule_GetFilenameUTF8(module, buffer, maxlen); @@ -27,27 +34,6 @@ _Py_GetMainfile(char *buffer, size_t maxlen) } -static PyObject * -import_get_module(PyThreadState *tstate, const char *modname) -{ - PyObject *module = NULL; - if (strcmp(modname, "__main__") == 0) { - module = _Py_GetMainModule(tstate); - if (_Py_CheckMainModule(module) < 0) { - assert(_PyErr_Occurred(tstate)); - return NULL; - } - } - else { - module = PyImport_ImportModule(modname); - if (module == NULL) { - return NULL; - } - } - return module; -} - - static PyObject * runpy_run_path(const char *filename, const char *modname) { @@ -67,97 +53,192 @@ runpy_run_path(const char *filename, const char *modname) } -static PyObject * -pyerr_get_message(PyObject *exc) +static void +set_exc_with_cause(PyObject *exctype, const char *msg) { - assert(!PyErr_Occurred()); - PyObject *args = PyException_GetArgs(exc); - if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { - return NULL; - } - if (PyUnicode_Check(args)) { - return args; - } - PyObject *msg = PySequence_GetItem(args, 0); - Py_DECREF(args); - if (msg == NULL) { - PyErr_Clear(); - return NULL; - } - if (!PyUnicode_Check(msg)) { - Py_DECREF(msg); - return NULL; - } - return msg; + PyObject *cause = PyErr_GetRaisedException(); + PyErr_SetString(exctype, msg); + PyObject *exc = PyErr_GetRaisedException(); + PyException_SetCause(exc, cause); + PyErr_SetRaisedException(exc); } -#define MAX_MODNAME (255) -#define MAX_ATTRNAME (255) -struct attributeerror_info { - char modname[MAX_MODNAME+1]; - char attrname[MAX_ATTRNAME+1]; +/****************************/ +/* module duplication utils */ +/****************************/ + +struct sync_module_result { + PyObject *module; + PyObject *loaded; + PyObject *failed; +}; + +struct sync_module { + const char *filename; + char _filename[MAXPATHLEN+1]; + struct sync_module_result cached; }; +static void +sync_module_clear(struct sync_module *data) +{ + data->filename = NULL; + Py_CLEAR(data->cached.module); + Py_CLEAR(data->cached.loaded); + Py_CLEAR(data->cached.failed); +} + +static void +sync_module_capture_exc(PyThreadState *tstate, struct sync_module *data) +{ + assert(_PyErr_Occurred(tstate)); + PyObject *context = data->cached.failed; + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetRaisedException(tstate, Py_NewRef(exc)); + if (context != NULL) { + PyException_SetContext(exc, context); + } + data->cached.failed = exc; +} + + static int -_parse_attributeerror(PyObject *exc, struct attributeerror_info *info) +ensure_isolated_main(PyThreadState *tstate, struct sync_module *main) { - assert(exc != NULL); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - int res = -1; + // Load the module from the original file (or from a cache). - PyObject *msgobj = pyerr_get_message(exc); - if (msgobj == NULL) { + // First try the local cache. + if (main->cached.failed != NULL) { + // We'll deal with this in apply_isolated_main(). + assert(main->cached.module == NULL); + assert(main->cached.loaded == NULL); + return 0; + } + else if (main->cached.loaded != NULL) { + assert(main->cached.module != NULL); + return 0; + } + assert(main->cached.module == NULL); + + if (main->filename == NULL) { + _PyErr_SetString(tstate, PyExc_NotImplementedError, ""); return -1; } - const char *err = PyUnicode_AsUTF8(msgobj); - if (strncmp(err, "module '", 8) != 0) { - goto finally; + // It wasn't in the local cache so we'll need to populate it. + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + // This is probably unrecoverable, so don't bother caching the error. + assert(_PyErr_Occurred(tstate)); + Py_XDECREF(mod); + return -1; } - err += 8; + PyObject *loaded = NULL; - const char *matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; + // Try the per-interpreter cache for the loaded module. + // XXX Store it in sys.modules? + PyObject *interpns = PyInterpreterState_GetDict(tstate->interp); + assert(interpns != NULL); + PyObject *key = PyUnicode_FromString("CACHED_MODULE_NS___main__"); + if (key == NULL) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + return -1; } - Py_ssize_t len = matched - err; - if (len > MAX_MODNAME) { - goto finally; + else if (PyDict_GetItemRef(interpns, key, &loaded) < 0) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + Py_DECREF(key); + return -1; } - (void)strncpy(info->modname, err, len); - info->modname[len] = '\0'; - err = matched; + else if (loaded == NULL) { + // It wasn't already loaded from file. + loaded = PyModule_NewObject(&_Py_ID(__main__)); + if (loaded == NULL) { + goto error; + } + PyObject *ns = _PyModule_GetDict(loaded); - if (strncmp(err, "' has no attribute '", 20) != 0) { - goto finally; - } - err += 20; + // We don't want to trigger "if __name__ == '__main__':", + // so we use a bogus module name. + PyObject *loaded_ns = + runpy_run_path(main->filename, "<fake __main__>"); + if (loaded_ns == NULL) { + goto error; + } + int res = PyDict_Update(ns, loaded_ns); + Py_DECREF(loaded_ns); + if (res < 0) { + goto error; + } - matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; - } - len = matched - err; - if (len > MAX_ATTRNAME) { - goto finally; + // Set the per-interpreter cache entry. + if (PyDict_SetItem(interpns, key, loaded) < 0) { + goto error; + } } - (void)strncpy(info->attrname, err, len); - info->attrname[len] = '\0'; - err = matched + 1; - if (strlen(err) > 0) { - goto finally; + Py_DECREF(key); + main->cached = (struct sync_module_result){ + .module = mod, + .loaded = loaded, + }; + return 0; + +error: + sync_module_capture_exc(tstate, main); + Py_XDECREF(loaded); + Py_DECREF(mod); + Py_XDECREF(key); + return -1; +} + +#ifndef NDEBUG +static int +main_mod_matches(PyObject *expected) +{ + PyObject *mod = PyImport_GetModule(&_Py_ID(__main__)); + Py_XDECREF(mod); + return mod == expected; +} +#endif + +static int +apply_isolated_main(PyThreadState *tstate, struct sync_module *main) +{ + assert((main->cached.loaded == NULL) == (main->cached.loaded == NULL)); + if (main->cached.failed != NULL) { + // It must have failed previously. + assert(main->cached.loaded == NULL); + _PyErr_SetRaisedException(tstate, main->cached.failed); + return -1; } - res = 0; + assert(main->cached.loaded != NULL); -finally: - Py_DECREF(msgobj); - return res; + assert(main_mod_matches(main->cached.module)); + if (_PyImport_SetModule(&_Py_ID(__main__), main->cached.loaded) < 0) { + sync_module_capture_exc(tstate, main); + return -1; + } + return 0; } -#undef MAX_MODNAME -#undef MAX_ATTRNAME +static void +restore_main(PyThreadState *tstate, struct sync_module *main) +{ + assert(main->cached.failed == NULL); + assert(main->cached.module != NULL); + assert(main->cached.loaded != NULL); + PyObject *exc = _PyErr_GetRaisedException(tstate); + assert(main_mod_matches(main->cached.loaded)); + int res = _PyImport_SetModule(&_Py_ID(__main__), main->cached.module); + assert(res == 0); + if (res < 0) { + PyErr_FormatUnraisable("Exception ignored while restoring __main__"); + } + _PyErr_SetRaisedException(tstate, exc); +} /**************/ @@ -207,16 +288,16 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ -/* registry of {type -> xidatafunc} */ +/* registry of {type -> _PyXIData_getdata_t} */ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ static void xid_lookup_init(_PyXIData_lookup_t *); static void xid_lookup_fini(_PyXIData_lookup_t *); struct _dlcontext; -static xidatafunc lookup_getdata(struct _dlcontext *, PyObject *); +static _PyXIData_getdata_t lookup_getdata(struct _dlcontext *, PyObject *); #include "crossinterp_data_lookup.h" @@ -340,7 +421,7 @@ _set_xid_lookup_failure(PyThreadState *tstate, PyObject *obj, const char *msg, set_notshareableerror(tstate, cause, 0, msg); } else { - msg = "%S does not support cross-interpreter data"; + msg = "%R does not support cross-interpreter data"; format_notshareableerror(tstate, cause, 0, msg, obj); } } @@ -353,8 +434,8 @@ _PyObject_CheckXIData(PyThreadState *tstate, PyObject *obj) if (get_lookup_context(tstate, &ctx) < 0) { return -1; } - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (!_PyErr_Occurred(tstate)) { _set_xid_lookup_failure(tstate, obj, NULL, NULL); } @@ -385,9 +466,9 @@ _check_xidata(PyThreadState *tstate, _PyXIData_t *xidata) return 0; } -int -_PyObject_GetXIData(PyThreadState *tstate, - PyObject *obj, _PyXIData_t *xidata) +static int +_get_xidata(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, _PyXIData_t *xidata) { PyInterpreterState *interp = tstate->interp; @@ -395,6 +476,7 @@ _PyObject_GetXIData(PyThreadState *tstate, assert(xidata->obj == NULL); if (xidata->data != NULL || xidata->obj != NULL) { _PyErr_SetString(tstate, PyExc_ValueError, "xidata not cleared"); + return -1; } // Call the "getdata" func for the object. @@ -403,8 +485,8 @@ _PyObject_GetXIData(PyThreadState *tstate, return -1; } Py_INCREF(obj); - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (PyErr_Occurred()) { Py_DECREF(obj); return -1; @@ -416,7 +498,9 @@ _PyObject_GetXIData(PyThreadState *tstate, } return -1; } - int res = getdata(tstate, obj, xidata); + int res = getdata.basic != NULL + ? getdata.basic(tstate, obj, xidata) + : getdata.fallback(tstate, obj, fallback, xidata); Py_DECREF(obj); if (res != 0) { PyObject *cause = _PyErr_GetRaisedException(tstate); @@ -436,6 +520,51 @@ _PyObject_GetXIData(PyThreadState *tstate, return 0; } +int +_PyObject_GetXIDataNoFallback(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return _get_xidata(tstate, obj, _PyXIDATA_XIDATA_ONLY, xidata); +} + +int +_PyObject_GetXIData(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) +{ + switch (fallback) { + case _PyXIDATA_XIDATA_ONLY: + return _get_xidata(tstate, obj, fallback, xidata); + case _PyXIDATA_FULL_FALLBACK: + if (_get_xidata(tstate, obj, fallback, xidata) == 0) { + return 0; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (PyFunction_Check(obj)) { + if (_PyFunction_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + _PyErr_Clear(tstate); + } + // We could try _PyMarshal_GetXIData() but we won't for now. + if (_PyPickle_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + // Raise the original exception. + _PyErr_SetRaisedException(tstate, exc); + return -1; + default: +#ifdef Py_DEBUG + Py_FatalError("unsupported xidata fallback option"); +#endif + _PyErr_SetString(tstate, PyExc_SystemError, + "unsupported xidata fallback option"); + return -1; + } +} + /* pickle C-API */ @@ -456,28 +585,6 @@ _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj) } -struct sync_module_result { - PyObject *module; - PyObject *loaded; - PyObject *failed; -}; - -struct sync_module { - const char *filename; - char _filename[MAXPATHLEN+1]; - struct sync_module_result cached; -}; - -static void -sync_module_clear(struct sync_module *data) -{ - data->filename = NULL; - Py_CLEAR(data->cached.module); - Py_CLEAR(data->cached.loaded); - Py_CLEAR(data->cached.failed); -} - - struct _unpickle_context { PyThreadState *tstate; // We only special-case the __main__ module, @@ -491,142 +598,88 @@ _unpickle_context_clear(struct _unpickle_context *ctx) sync_module_clear(&ctx->main); } -static struct sync_module_result -_unpickle_context_get_module(struct _unpickle_context *ctx, - const char *modname) +static int +check_missing___main___attr(PyObject *exc) { - if (strcmp(modname, "__main__") == 0) { - return ctx->main.cached; + assert(!PyErr_Occurred()); + if (!PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)) { + return 0; } - else { - return (struct sync_module_result){ - .failed = PyExc_NotImplementedError, - }; + + // Get the error message. + PyObject *args = PyException_GetArgs(exc); + if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { + assert(!PyErr_Occurred()); + return 0; + } + PyObject *msgobj = args; + if (!PyUnicode_Check(msgobj)) { + msgobj = PySequence_GetItem(args, 0); + Py_DECREF(args); + if (msgobj == NULL) { + PyErr_Clear(); + return 0; + } } + const char *err = PyUnicode_AsUTF8(msgobj); + + // Check if it's a missing __main__ attr. + int cmp = strncmp(err, "module '__main__' has no attribute '", 36); + Py_DECREF(msgobj); + return cmp == 0; } -static struct sync_module_result -_unpickle_context_set_module(struct _unpickle_context *ctx, - const char *modname) +static PyObject * +_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) { - struct sync_module_result res = {0}; - struct sync_module_result *cached = NULL; - const char *filename = NULL; - const char *run_modname = modname; - if (strcmp(modname, "__main__") == 0) { - cached = &ctx->main.cached; - filename = ctx->main.filename; - // We don't want to trigger "if __name__ == '__main__':". - run_modname = "<fake __main__>"; - } - else { - res.failed = PyExc_NotImplementedError; - goto finally; - } + PyThreadState *tstate = ctx->tstate; - res.module = import_get_module(ctx->tstate, modname); - if (res.module == NULL) { - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); - goto finally; + PyObject *exc = NULL; + PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); + if (loads == NULL) { + return NULL; } - if (filename == NULL) { - Py_CLEAR(res.module); - res.failed = PyExc_NotImplementedError; + // Make an initial attempt to unpickle. + PyObject *obj = PyObject_CallOneArg(loads, pickled); + if (obj != NULL) { goto finally; } - res.loaded = runpy_run_path(filename, run_modname); - if (res.loaded == NULL) { - Py_CLEAR(res.module); - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); + assert(_PyErr_Occurred(tstate)); + if (ctx == NULL) { goto finally; } - -finally: - if (cached != NULL) { - assert(cached->module == NULL); - assert(cached->loaded == NULL); - assert(cached->failed == NULL); - *cached = res; - } - return res; -} - - -static int -_handle_unpickle_missing_attr(struct _unpickle_context *ctx, PyObject *exc) -{ - // The caller must check if an exception is set or not when -1 is returned. - assert(!_PyErr_Occurred(ctx->tstate)); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - struct attributeerror_info info; - if (_parse_attributeerror(exc, &info) < 0) { - return -1; + exc = _PyErr_GetRaisedException(tstate); + if (!check_missing___main___attr(exc)) { + goto finally; } - // Get the module. - struct sync_module_result mod = _unpickle_context_get_module(ctx, info.modname); - if (mod.failed != NULL) { - // It must have failed previously. - return -1; - } - if (mod.module == NULL) { - mod = _unpickle_context_set_module(ctx, info.modname); - if (mod.failed != NULL) { - return -1; - } - assert(mod.module != NULL); + // Temporarily swap in a fake __main__ loaded from the original + // file and cached. Note that functions will use the cached ns + // for __globals__, // not the actual module. + if (ensure_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - - // Bail out if it is unexpectedly set already. - if (PyObject_HasAttrString(mod.module, info.attrname)) { - return -1; + if (apply_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - // Try setting the attribute. - PyObject *value = NULL; - if (PyDict_GetItemStringRef(mod.loaded, info.attrname, &value) <= 0) { - return -1; - } - assert(value != NULL); - int res = PyObject_SetAttrString(mod.module, info.attrname, value); - Py_DECREF(value); - if (res < 0) { - return -1; + // Try to unpickle once more. + obj = PyObject_CallOneArg(loads, pickled); + restore_main(tstate, &ctx->main); + if (obj == NULL) { + goto finally; } + Py_CLEAR(exc); - return 0; -} - -static PyObject * -_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) -{ - PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); - if (loads == NULL) { - return NULL; - } - PyObject *obj = PyObject_CallOneArg(loads, pickled); - if (ctx != NULL) { - while (obj == NULL) { - assert(_PyErr_Occurred(ctx->tstate)); - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { - // We leave other failures unhandled. - break; - } - // Try setting the attr if not set. - PyObject *exc = _PyErr_GetRaisedException(ctx->tstate); - if (_handle_unpickle_missing_attr(ctx, exc) < 0) { - // Any resulting exceptions are ignored - // in favor of the original. - _PyErr_SetRaisedException(ctx->tstate, exc); - break; - } - Py_CLEAR(exc); - // Retry with the attribute set. - obj = PyObject_CallOneArg(loads, pickled); +finally: + if (exc != NULL) { + if (_PyErr_Occurred(tstate)) { + sync_module_capture_exc(tstate, &ctx->main); } + // We restore the original exception. + // It might make sense to chain it (__context__). + _PyErr_SetRaisedException(tstate, exc); } Py_DECREF(loads); return obj; @@ -784,35 +837,167 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) } -/* using cross-interpreter data */ - -PyObject * -_PyXIData_NewObject(_PyXIData_t *xidata) -{ - return xidata->new_object(xidata); -} +/* script wrapper */ static int -_call_clear_xidata(void *data) +verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure) { - _xidata_clear((_PyXIData_t *)data); + // Make sure it isn't a closure and (optionally) doesn't use globals. + PyObject *builtins = NULL; + if (pure) { + builtins = _PyEval_GetBuiltins(tstate); + assert(builtins != NULL); + } + if (checked) { + assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0); + } + else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) { + return -1; + } + // Make sure it doesn't have args. + if (co->co_argcount > 0 + || co->co_posonlyargcount > 0 + || co->co_kwonlyargcount > 0 + || co->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) + { + _PyErr_SetString(tstate, PyExc_ValueError, + "code with args not supported"); + return -1; + } + // Make sure it doesn't return anything. + if (!_PyCode_ReturnsOnlyNone(co)) { + _PyErr_SetString(tstate, PyExc_ValueError, + "code that returns a value is not a script"); + return -1; + } return 0; } static int -_xidata_release(_PyXIData_t *xidata, int rawfree) -{ - if ((xidata->data == NULL || xidata->free == NULL) && xidata->obj == NULL) { - // Nothing to release! - if (rawfree) { - PyMem_RawFree(xidata); - } - else { - xidata->data = NULL; +get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure, + _PyXIData_t *xidata) +{ + // Get the corresponding code object. + PyObject *code = NULL; + int checked = 0; + if (PyCode_Check(obj)) { + code = obj; + Py_INCREF(code); + } + else if (PyFunction_Check(obj)) { + code = PyFunction_GET_CODE(obj); + assert(code != NULL); + Py_INCREF(code); + if (pure) { + if (_PyFunction_VerifyStateless(tstate, obj) < 0) { + goto error; + } + checked = 1; } - return 0; } - + else { + const char *filename = "<script>"; + int optimize = 0; + PyCompilerFlags cf = _PyCompilerFlags_INIT; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + PyObject *ref = NULL; + const char *script = _Py_SourceAsString(obj, "???", "???", &cf, &ref); + if (script == NULL) { + if (!_PyObject_SupportedAsScript(obj)) { + // We discard the raised exception. + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported script %R", obj); + } + goto error; + } +#ifdef Py_GIL_DISABLED + // Don't immortalize code constants to avoid memory leaks. + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization++; +#endif + code = Py_CompileStringExFlags( + script, filename, Py_file_input, &cf, optimize); +#ifdef Py_GIL_DISABLED + ((_PyThreadStateImpl *)tstate)->suppress_co_const_immortalization--; +#endif + Py_XDECREF(ref); + if (code == NULL) { + goto error; + } + // Compiled text can't have args or any return statements, + // nor be a closure. It can use globals though. + if (!pure) { + // We don't need to check for globals either. + checked = 1; + } + } + + // Make sure it's actually a script. + if (verify_script(tstate, (PyCodeObject *)code, checked, pure) < 0) { + goto error; + } + + // Convert the code object. + int res = _PyCode_GetXIData(tstate, code, xidata); + Py_DECREF(code); + if (res < 0) { + return -1; + } + return 0; + +error: + Py_XDECREF(code); + PyObject *cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + _set_xid_lookup_failure( + tstate, NULL, "object not a valid script", cause); + Py_DECREF(cause); + return -1; +} + +int +_PyCode_GetScriptXIData(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return get_script_xidata(tstate, obj, 0, xidata); +} + +int +_PyCode_GetPureScriptXIData(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return get_script_xidata(tstate, obj, 1, xidata); +} + + +/* using cross-interpreter data */ + +PyObject * +_PyXIData_NewObject(_PyXIData_t *xidata) +{ + return xidata->new_object(xidata); +} + +static int +_call_clear_xidata(void *data) +{ + _xidata_clear((_PyXIData_t *)data); + return 0; +} + +static int +_xidata_release(_PyXIData_t *xidata, int rawfree) +{ + if ((xidata->data == NULL || xidata->free == NULL) && xidata->obj == NULL) { + // Nothing to release! + if (rawfree) { + PyMem_RawFree(xidata); + } + else { + xidata->data = NULL; + } + return 0; + } + // Switch to the original interpreter. PyInterpreterState *interp = _PyInterpreterState_LookUpID( _PyXIData_INTERPID(xidata)); @@ -968,8 +1153,8 @@ _release_xid_data(_PyXIData_t *xidata, int rawfree) { PyObject *exc = PyErr_GetRaisedException(); int res = rawfree - ? _PyXIData_Release(xidata) - : _PyXIData_ReleaseAndRawFree(xidata); + ? _PyXIData_ReleaseAndRawFree(xidata) + : _PyXIData_Release(xidata); if (res < 0) { /* The owning interpreter is already destroyed. */ _PyXIData_Clear(NULL, xidata); @@ -1130,8 +1315,14 @@ _excinfo_normalize_type(struct _excinfo_type *info, *p_module = module; } +static int +excinfo_is_set(_PyXI_excinfo *info) +{ + return info->type.name != NULL || info->msg != NULL; +} + static void -_PyXI_excinfo_Clear(_PyXI_excinfo *info) +_PyXI_excinfo_clear(_PyXI_excinfo *info) { _excinfo_clear_type(&info->type); if (info->msg != NULL) { @@ -1181,7 +1372,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) assert(exc != NULL); if (PyErr_GivenExceptionMatches(exc, PyExc_MemoryError)) { - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return NULL; } const char *failure = NULL; @@ -1227,7 +1418,7 @@ _PyXI_excinfo_InitFromException(_PyXI_excinfo *info, PyObject *exc) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1278,7 +1469,7 @@ _PyXI_excinfo_InitFromObject(_PyXI_excinfo *info, PyObject *obj) error: assert(failure != NULL); - _PyXI_excinfo_Clear(info); + _PyXI_excinfo_clear(info); return failure; } @@ -1291,6 +1482,11 @@ _PyXI_excinfo_Apply(_PyXI_excinfo *info, PyObject *exctype) if (tbexc == NULL) { PyErr_Clear(); } + else { + PyErr_SetObject(exctype, tbexc); + Py_DECREF(tbexc); + return; + } } PyObject *formatted = _PyXI_excinfo_format(info); @@ -1436,13 +1632,17 @@ _PyXI_excinfo_AsObject(_PyXI_excinfo *info) } -int -_PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) +_PyXI_excinfo * +_PyXI_NewExcInfo(PyObject *exc) { assert(!PyErr_Occurred()); if (exc == NULL || exc == Py_None) { PyErr_SetString(PyExc_ValueError, "missing exc"); - return -1; + return NULL; + } + _PyXI_excinfo *info = PyMem_RawCalloc(1, sizeof(_PyXI_excinfo)); + if (info == NULL) { + return NULL; } const char *failure; if (PyExceptionInstance_Check(exc) || PyExceptionClass_Check(exc)) { @@ -1452,10 +1652,18 @@ _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc) failure = _PyXI_excinfo_InitFromObject(info, exc); } if (failure != NULL) { - PyErr_SetString(PyExc_Exception, failure); - return -1; + PyMem_RawFree(info); + set_exc_with_cause(PyExc_Exception, failure); + return NULL; } - return 0; + return info; +} + +void +_PyXI_FreeExcInfo(_PyXI_excinfo *info) +{ + _PyXI_excinfo_clear(info); + PyMem_RawFree(info); } PyObject * @@ -1470,12 +1678,6 @@ _PyXI_ExcInfoAsObject(_PyXI_excinfo *info) return _PyXI_excinfo_AsObject(info); } -void -_PyXI_ClearExcInfo(_PyXI_excinfo *info) -{ - _PyXI_excinfo_Clear(info); -} - /***************************/ /* short-term data sharing */ @@ -1489,14 +1691,9 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyThreadState *tstate = _PyThreadState_GET(); assert(!PyErr_Occurred()); + assert(code != _PyXI_ERR_NO_ERROR); + assert(code != _PyXI_ERR_UNCAUGHT_EXCEPTION); switch (code) { - case _PyXI_ERR_NO_ERROR: _Py_FALLTHROUGH; - case _PyXI_ERR_UNCAUGHT_EXCEPTION: - // There is nothing to apply. -#ifdef Py_DEBUG - Py_UNREACHABLE(); -#endif - return 0; case _PyXI_ERR_OTHER: // XXX msg? PyErr_SetNone(PyExc_InterpreterError); @@ -1516,12 +1713,20 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) PyErr_SetString(PyExc_InterpreterError, "failed to apply namespace to __main__"); break; + case _PyXI_ERR_PRESERVE_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to preserve objects across session"); + break; + case _PyXI_ERR_EXC_PROPAGATION_FAILURE: + PyErr_SetString(PyExc_InterpreterError, + "failed to transfer exception between interpreters"); + break; case _PyXI_ERR_NOT_SHAREABLE: _set_xid_lookup_failure(tstate, NULL, NULL, NULL); break; default: #ifdef Py_DEBUG - Py_UNREACHABLE(); + Py_FatalError("unsupported error code"); #else PyErr_Format(PyExc_RuntimeError, "unsupported error code %d", code); #endif @@ -1530,70 +1735,276 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp) return -1; } +/* basic failure info */ + +struct xi_failure { + // The kind of error to propagate. + _PyXI_errcode code; + // The propagated message. + const char *msg; + int msg_owned; +}; // _PyXI_failure + +#define XI_FAILURE_INIT (_PyXI_failure){ .code = _PyXI_ERR_NO_ERROR } + +static void +clear_xi_failure(_PyXI_failure *failure) +{ + if (failure->msg != NULL && failure->msg_owned) { + PyMem_RawFree((void*)failure->msg); + } + *failure = XI_FAILURE_INIT; +} + +static void +copy_xi_failure(_PyXI_failure *dest, _PyXI_failure *src) +{ + *dest = *src; + dest->msg_owned = 0; +} + +_PyXI_failure * +_PyXI_NewFailure(void) +{ + _PyXI_failure *failure = PyMem_RawMalloc(sizeof(_PyXI_failure)); + if (failure == NULL) { + PyErr_NoMemory(); + return NULL; + } + *failure = XI_FAILURE_INIT; + return failure; +} + +void +_PyXI_FreeFailure(_PyXI_failure *failure) +{ + clear_xi_failure(failure); + PyMem_RawFree(failure); +} + +_PyXI_errcode +_PyXI_GetFailureCode(_PyXI_failure *failure) +{ + if (failure == NULL) { + return _PyXI_ERR_NO_ERROR; + } + return failure->code; +} + +void +_PyXI_InitFailureUTF8(_PyXI_failure *failure, + _PyXI_errcode code, const char *msg) +{ + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 0, + }; +} + +int +_PyXI_InitFailure(_PyXI_failure *failure, _PyXI_errcode code, PyObject *obj) +{ + *failure = (_PyXI_failure){ + .code = code, + .msg = NULL, + .msg_owned = 0, + }; + if (obj == NULL) { + return 0; + } + + PyObject *msgobj = PyObject_Str(obj); + if (msgobj == NULL) { + return -1; + } + // This will leak if not paired with clear_xi_failure(). + // That happens automatically in _capture_current_exception(). + const char *msg = _copy_string_obj_raw(msgobj, NULL); + Py_DECREF(msgobj); + if (msg == NULL) { + return -1; + } + *failure = (_PyXI_failure){ + .code = code, + .msg = msg, + .msg_owned = 1, + }; + return 0; +} + /* shared exceptions */ -static const char * -_PyXI_InitError(_PyXI_error *error, PyObject *excobj, _PyXI_errcode code) +typedef struct { + // The originating interpreter. + PyInterpreterState *interp; + // The error to propagate, if different from the uncaught exception. + _PyXI_failure *override; + _PyXI_failure _override; + // The exception information to propagate, if applicable. + // This is populated only for some error codes, + // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. + _PyXI_excinfo uncaught; +} _PyXI_error; + +static void +xi_error_clear(_PyXI_error *err) { - if (error->interp == NULL) { - error->interp = PyInterpreterState_Get(); + err->interp = NULL; + if (err->override != NULL) { + clear_xi_failure(err->override); } + _PyXI_excinfo_clear(&err->uncaught); +} - const char *failure = NULL; - if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // There is an unhandled exception we need to propagate. - failure = _PyXI_excinfo_InitFromException(&error->uncaught, excobj); - if (failure != NULL) { - // We failed to initialize error->uncaught. - // XXX Print the excobj/traceback? Emit a warning? - // XXX Print the current exception/traceback? - if (PyErr_ExceptionMatches(PyExc_MemoryError)) { - error->code = _PyXI_ERR_NO_MEMORY; - } - else { - error->code = _PyXI_ERR_OTHER; - } - PyErr_Clear(); +static int +xi_error_is_set(_PyXI_error *error) +{ + if (error->override != NULL) { + assert(error->override->code != _PyXI_ERR_NO_ERROR); + assert(error->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION + || excinfo_is_set(&error->uncaught)); + return 1; + } + return excinfo_is_set(&error->uncaught); +} + +static int +xi_error_has_override(_PyXI_error *err) +{ + if (err->override == NULL) { + return 0; + } + return (err->override->code != _PyXI_ERR_NO_ERROR + && err->override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); +} + +static PyObject * +xi_error_resolve_current_exc(PyThreadState *tstate, + _PyXI_failure *override) +{ + assert(override == NULL || override->code != _PyXI_ERR_NO_ERROR); + + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (exc == NULL) { + assert(override == NULL + || override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + } + else if (override == NULL) { + // This is equivalent to _PyXI_ERR_UNCAUGHT_EXCEPTION. + } + else if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + // We want to actually capture the current exception. + } + else if (exc != NULL) { + // It might make sense to do similarly for other codes. + if (override->code == _PyXI_ERR_ALREADY_RUNNING) { + // We don't need the exception info. + Py_CLEAR(exc); + } + // ...else we want to actually capture the current exception. + } + return exc; +} + +static void +xi_error_set_override(PyThreadState *tstate, _PyXI_error *err, + _PyXI_failure *override) +{ + assert(err->override == NULL); + assert(override != NULL); + assert(override->code != _PyXI_ERR_NO_ERROR); + // Use xi_error_set_exc() instead of setting _PyXI_ERR_UNCAUGHT_EXCEPTION.. + assert(override->code != _PyXI_ERR_UNCAUGHT_EXCEPTION); + err->override = &err->_override; + // The caller still owns override->msg. + copy_xi_failure(&err->_override, override); + err->interp = tstate->interp; +} + +static void +xi_error_set_override_code(PyThreadState *tstate, _PyXI_error *err, + _PyXI_errcode code) +{ + _PyXI_failure override = XI_FAILURE_INIT; + override.code = code; + xi_error_set_override(tstate, err, &override); +} + +static const char * +xi_error_set_exc(PyThreadState *tstate, _PyXI_error *err, PyObject *exc) +{ + assert(!_PyErr_Occurred(tstate)); + assert(!xi_error_is_set(err)); + assert(err->override == NULL); + assert(err->interp == NULL); + assert(exc != NULL); + const char *failure = + _PyXI_excinfo_InitFromException(&err->uncaught, exc); + if (failure != NULL) { + // We failed to initialize err->uncaught. + // XXX Print the excobj/traceback? Emit a warning? + // XXX Print the current exception/traceback? + if (_PyErr_ExceptionMatches(tstate, PyExc_MemoryError)) { + xi_error_set_override_code(tstate, err, _PyXI_ERR_NO_MEMORY); } else { - error->code = code; + xi_error_set_override_code(tstate, err, _PyXI_ERR_OTHER); } - assert(error->code != _PyXI_ERR_NO_ERROR); - } - else { - // There is an error code we need to propagate. - assert(excobj == NULL); - assert(code != _PyXI_ERR_NO_ERROR); - error->code = code; - _PyXI_excinfo_Clear(&error->uncaught); + PyErr_Clear(); } return failure; } -PyObject * -_PyXI_ApplyError(_PyXI_error *error) +static PyObject * +_PyXI_ApplyError(_PyXI_error *error, const char *failure) { PyThreadState *tstate = PyThreadState_Get(); - if (error->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // Raise an exception that proxies the propagated exception. + + if (failure != NULL) { + xi_error_clear(error); + return NULL; + } + + _PyXI_errcode code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + if (error->override != NULL) { + code = error->override->code; + assert(code != _PyXI_ERR_NO_ERROR); + } + + if (code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + // We will raise an exception that proxies the propagated exception. return _PyXI_excinfo_AsObject(&error->uncaught); } - else if (error->code == _PyXI_ERR_NOT_SHAREABLE) { + else if (code == _PyXI_ERR_NOT_SHAREABLE) { // Propagate the exception directly. assert(!_PyErr_Occurred(tstate)); - _set_xid_lookup_failure(tstate, NULL, error->uncaught.msg, NULL); + PyObject *cause = NULL; + if (excinfo_is_set(&error->uncaught)) { + // Maybe instead set a PyExc_ExceptionSnapshot as __cause__? + // That type doesn't exist currently + // but would look like interpreters.ExecutionFailed. + _PyXI_excinfo_Apply(&error->uncaught, PyExc_Exception); + cause = _PyErr_GetRaisedException(tstate); + } + const char *msg = error->override != NULL + ? error->override->msg + : error->uncaught.msg; + _set_xid_lookup_failure(tstate, NULL, msg, cause); + Py_XDECREF(cause); } else { // Raise an exception corresponding to the code. - assert(error->code != _PyXI_ERR_NO_ERROR); - (void)_PyXI_ApplyErrorCode(error->code, error->interp); - if (error->uncaught.type.name != NULL || error->uncaught.msg != NULL) { + (void)_PyXI_ApplyErrorCode(code, error->interp); + assert(error->override == NULL || error->override->msg == NULL); + if (excinfo_is_set(&error->uncaught)) { // __context__ will be set to a proxy of the propagated exception. - PyObject *exc = PyErr_GetRaisedException(); + // (or use PyExc_ExceptionSnapshot like _PyXI_ERR_NOT_SHAREABLE?) + PyObject *exc = _PyErr_GetRaisedException(tstate); _PyXI_excinfo_Apply(&error->uncaught, PyExc_InterpreterError); - PyObject *exc2 = PyErr_GetRaisedException(); + PyObject *exc2 = _PyErr_GetRaisedException(tstate); PyException_SetContext(exc, exc2); - PyErr_SetRaisedException(exc); + _PyErr_SetRaisedException(tstate, exc); } } assert(PyErr_Occurred()); @@ -1624,6 +2035,7 @@ typedef struct _sharednsitem { // in a different interpreter to release the XI data. } _PyXI_namespace_item; +#ifndef NDEBUG static int _sharednsitem_is_initialized(_PyXI_namespace_item *item) { @@ -1632,6 +2044,7 @@ _sharednsitem_is_initialized(_PyXI_namespace_item *item) } return 0; } +#endif static int _sharednsitem_init(_PyXI_namespace_item *item, PyObject *key) @@ -1659,7 +2072,8 @@ _sharednsitem_has_value(_PyXI_namespace_item *item, int64_t *p_interpid) } static int -_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) +_sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value, + xidata_fallback_t fallback) { assert(_sharednsitem_is_initialized(item)); assert(item->xidata == NULL); @@ -1668,7 +2082,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) return -1; } PyThreadState *tstate = PyThreadState_Get(); - if (_PyObject_GetXIData(tstate, value, item->xidata) != 0) { + if (_PyObject_GetXIData(tstate, value, fallback, item->xidata) < 0) { PyMem_RawFree(item->xidata); item->xidata = NULL; // The caller may want to propagate PyExc_NotShareableError @@ -1700,7 +2114,8 @@ _sharednsitem_clear(_PyXI_namespace_item *item) } static int -_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) +_sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns, + xidata_fallback_t fallback) { assert(item->name != NULL); assert(item->xidata == NULL); @@ -1712,7 +2127,7 @@ _sharednsitem_copy_from_ns(struct _sharednsitem *item, PyObject *ns) // When applied, this item will be set to the default (or fail). return 0; } - if (_sharednsitem_set_value(item, value) < 0) { + if (_sharednsitem_set_value(item, value, fallback) < 0) { return -1; } return 0; @@ -1742,156 +2157,212 @@ _sharednsitem_apply(_PyXI_namespace_item *item, PyObject *ns, PyObject *dflt) return res; } -struct _sharedns { - Py_ssize_t len; - _PyXI_namespace_item *items; -}; -static _PyXI_namespace * -_sharedns_new(void) -{ - _PyXI_namespace *ns = PyMem_RawCalloc(sizeof(_PyXI_namespace), 1); - if (ns == NULL) { - PyErr_NoMemory(); - return NULL; - } - *ns = (_PyXI_namespace){ 0 }; - return ns; -} +typedef struct { + Py_ssize_t maxitems; + Py_ssize_t numnames; + Py_ssize_t numvalues; + _PyXI_namespace_item items[1]; +} _PyXI_namespace; +#ifndef NDEBUG static int -_sharedns_is_initialized(_PyXI_namespace *ns) +_sharedns_check_counts(_PyXI_namespace *ns) { - if (ns->len == 0) { - assert(ns->items == NULL); + if (ns->maxitems <= 0) { + return 0; + } + if (ns->numnames < 0) { + return 0; + } + if (ns->numnames > ns->maxitems) { + return 0; + } + if (ns->numvalues < 0) { + return 0; + } + if (ns->numvalues > ns->numnames) { return 0; } - - assert(ns->len > 0); - assert(ns->items != NULL); - assert(_sharednsitem_is_initialized(&ns->items[0])); - assert(ns->len == 1 - || _sharednsitem_is_initialized(&ns->items[ns->len - 1])); return 1; } -#define HAS_COMPLETE_DATA 1 -#define HAS_PARTIAL_DATA 2 - static int -_sharedns_has_xidata(_PyXI_namespace *ns, int64_t *p_interpid) +_sharedns_check_consistency(_PyXI_namespace *ns) { - // We expect _PyXI_namespace to always be initialized. - assert(_sharedns_is_initialized(ns)); - int res = 0; - _PyXI_namespace_item *item0 = &ns->items[0]; - if (!_sharednsitem_is_initialized(item0)) { + if (!_sharedns_check_counts(ns)) { return 0; } - int64_t interpid0 = -1; - if (!_sharednsitem_has_value(item0, &interpid0)) { - return 0; + + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + item = &ns->items[0]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid0 = -1; + if (!_sharednsitem_has_value(item, &interpid0)) { + return 0; + } + i += 1; + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; + } + int64_t interpid = -1; + if (!_sharednsitem_has_value(item, &interpid)) { + return 0; + } + if (interpid != interpid0) { + return 0; + } + } } - if (ns->len > 1) { - // At this point we know it is has at least partial data. - _PyXI_namespace_item *itemN = &ns->items[ns->len-1]; - if (!_sharednsitem_is_initialized(itemN)) { - res = HAS_PARTIAL_DATA; - goto finally; + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + if (!_sharednsitem_is_initialized(item)) { + return 0; } - int64_t interpidN = -1; - if (!_sharednsitem_has_value(itemN, &interpidN)) { - res = HAS_PARTIAL_DATA; - goto finally; + if (_sharednsitem_has_value(item, NULL)) { + return 0; } - assert(interpidN == interpid0); } - res = HAS_COMPLETE_DATA; - *p_interpid = interpid0; - -finally: - return res; + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + if (_sharednsitem_is_initialized(item)) { + return 0; + } + if (_sharednsitem_has_value(item, NULL)) { + return 0; + } + } + return 1; } +#endif -static void -_sharedns_clear(_PyXI_namespace *ns) +static _PyXI_namespace * +_sharedns_alloc(Py_ssize_t maxitems) { - if (!_sharedns_is_initialized(ns)) { - return; + if (maxitems < 0) { + if (!PyErr_Occurred()) { + PyErr_BadInternalCall(); + } + return NULL; } - - // If the cross-interpreter data were allocated as part of - // _PyXI_namespace_item (instead of dynamically), this is where - // we would need verify that we are clearing the items in the - // correct interpreter, to avoid a race with releasing the XI data - // via a pending call. See _sharedns_has_xidata(). - for (Py_ssize_t i=0; i < ns->len; i++) { - _sharednsitem_clear(&ns->items[i]); + else if (maxitems == 0) { + PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); + return NULL; } - PyMem_RawFree(ns->items); - ns->items = NULL; - ns->len = 0; -} -static void + // Check for overflow. + size_t fixedsize = sizeof(_PyXI_namespace) - sizeof(_PyXI_namespace_item); + if ((size_t)maxitems > + ((size_t)PY_SSIZE_T_MAX - fixedsize) / sizeof(_PyXI_namespace_item)) + { + PyErr_NoMemory(); + return NULL; + } + + // Allocate the value, including items. + size_t size = fixedsize + sizeof(_PyXI_namespace_item) * maxitems; + + _PyXI_namespace *ns = PyMem_RawCalloc(size, 1); + if (ns == NULL) { + PyErr_NoMemory(); + return NULL; + } + ns->maxitems = maxitems; + assert(_sharedns_check_consistency(ns)); + return ns; +} + +static void _sharedns_free(_PyXI_namespace *ns) { - _sharedns_clear(ns); + // If we weren't always dynamically allocating the cross-interpreter + // data in each item then we would need to use a pending call + // to call _sharedns_free(), to avoid the race between freeing + // the shared namespace and releasing the XI data. + assert(_sharedns_check_counts(ns)); + Py_ssize_t i = 0; + _PyXI_namespace_item *item; + if (ns->numvalues > 0) { + // One or more items may have interpreter-specific data. +#ifndef NDEBUG + int64_t interpid = PyInterpreterState_GetID(PyInterpreterState_Get()); + int64_t interpid_i; +#endif + for (; i < ns->numvalues; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + // While we do want to ensure consistency across items, + // technically they don't need to match the current + // interpreter. However, we keep the constraint for + // simplicity, by giving _PyXI_FreeNamespace() the exclusive + // responsibility of dealing with the owning interpreter. + assert(_sharednsitem_has_value(item, &interpid_i)); + assert(interpid_i == interpid); + _sharednsitem_clear(item); + } + } + for (; i < ns->numnames; i++) { + item = &ns->items[i]; + assert(_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + _sharednsitem_clear(item); + } +#ifndef NDEBUG + for (; i < ns->maxitems; i++) { + item = &ns->items[i]; + assert(!_sharednsitem_is_initialized(item)); + assert(!_sharednsitem_has_value(item, NULL)); + } +#endif + PyMem_RawFree(ns); } -static int -_sharedns_init(_PyXI_namespace *ns, PyObject *names) +static _PyXI_namespace * +_create_sharedns(PyObject *names) { - assert(!_sharedns_is_initialized(ns)); assert(names != NULL); - Py_ssize_t len = PyDict_CheckExact(names) + Py_ssize_t numnames = PyDict_CheckExact(names) ? PyDict_Size(names) : PySequence_Size(names); - if (len < 0) { - return -1; - } - if (len == 0) { - PyErr_SetString(PyExc_ValueError, "empty namespaces not allowed"); - return -1; - } - assert(len > 0); - // Allocate the items. - _PyXI_namespace_item *items = - PyMem_RawCalloc(sizeof(struct _sharednsitem), len); - if (items == NULL) { - PyErr_NoMemory(); - return -1; + _PyXI_namespace *ns = _sharedns_alloc(numnames); + if (ns == NULL) { + return NULL; } + _PyXI_namespace_item *items = ns->items; // Fill in the names. - Py_ssize_t i = -1; if (PyDict_CheckExact(names)) { + Py_ssize_t i = 0; Py_ssize_t pos = 0; - for (i=0; i < len; i++) { - PyObject *key; - if (!PyDict_Next(names, &pos, &key, NULL)) { - // This should not be possible. - assert(0); - goto error; - } - if (_sharednsitem_init(&items[i], key) < 0) { + PyObject *name; + while(PyDict_Next(names, &pos, &name, NULL)) { + if (_sharednsitem_init(&items[i], name) < 0) { goto error; } + ns->numnames += 1; + i += 1; } } else if (PySequence_Check(names)) { - for (i=0; i < len; i++) { - PyObject *key = PySequence_GetItem(names, i); - if (key == NULL) { + for (Py_ssize_t i = 0; i < numnames; i++) { + PyObject *name = PySequence_GetItem(names, i); + if (name == NULL) { goto error; } - int res = _sharednsitem_init(&items[i], key); - Py_DECREF(key); + int res = _sharednsitem_init(&items[i], name); + Py_DECREF(name); if (res < 0) { goto error; } + ns->numnames += 1; } } else { @@ -1899,140 +2370,84 @@ _sharedns_init(_PyXI_namespace *ns, PyObject *names) "non-sequence namespace not supported"); goto error; } - - ns->items = items; - ns->len = len; - assert(_sharedns_is_initialized(ns)); - return 0; + assert(ns->numnames == ns->maxitems); + return ns; error: - for (Py_ssize_t j=0; j < i; j++) { - _sharednsitem_clear(&items[j]); - } - PyMem_RawFree(items); - assert(!_sharedns_is_initialized(ns)); - return -1; + _sharedns_free(ns); + return NULL; } -void -_PyXI_FreeNamespace(_PyXI_namespace *ns) -{ - if (!_sharedns_is_initialized(ns)) { - return; - } - - int64_t interpid = -1; - if (!_sharedns_has_xidata(ns, &interpid)) { - _sharedns_free(ns); - return; - } +static void _propagate_not_shareable_error(PyThreadState *, + _PyXI_failure *); - if (interpid == PyInterpreterState_GetID(PyInterpreterState_Get())) { - _sharedns_free(ns); - } - else { - // If we weren't always dynamically allocating the cross-interpreter - // data in each item then we would need to using a pending call - // to call _sharedns_free(), to avoid the race between freeing - // the shared namespace and releasing the XI data. - _sharedns_free(ns); - } -} - -_PyXI_namespace * -_PyXI_NamespaceFromNames(PyObject *names) +static int +_fill_sharedns(_PyXI_namespace *ns, PyObject *nsobj, + xidata_fallback_t fallback, _PyXI_failure *p_err) { - if (names == NULL || names == Py_None) { - return NULL; - } - - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; - } - - if (_sharedns_init(ns, names) < 0) { - PyMem_RawFree(ns); - if (PySequence_Size(names) == 0) { - PyErr_Clear(); - } - return NULL; - } - - return ns; -} - -#ifndef NDEBUG -static int _session_is_active(_PyXI_session *); -#endif -static void _propagate_not_shareable_error(_PyXI_session *); - -int -_PyXI_FillNamespaceFromDict(_PyXI_namespace *ns, PyObject *nsobj, - _PyXI_session *session) -{ - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - assert(_sharedns_is_initialized(ns)); - for (Py_ssize_t i=0; i < ns->len; i++) { - _PyXI_namespace_item *item = &ns->items[i]; - if (_sharednsitem_copy_from_ns(item, nsobj) < 0) { - _propagate_not_shareable_error(session); + // All items are expected to be shareable. + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + assert(ns->numvalues == 0); + PyThreadState *tstate = PyThreadState_Get(); + for (Py_ssize_t i=0; i < ns->maxitems; i++) { + if (_sharednsitem_copy_from_ns(&ns->items[i], nsobj, fallback) < 0) { + if (p_err != NULL) { + _propagate_not_shareable_error(tstate, p_err); + } // Clear out the ones we set so far. for (Py_ssize_t j=0; j < i; j++) { _sharednsitem_clear_value(&ns->items[j]); + ns->numvalues -= 1; } return -1; } + ns->numvalues += 1; } return 0; } -// All items are expected to be shareable. -static _PyXI_namespace * -_PyXI_NamespaceFromDict(PyObject *nsobj, _PyXI_session *session) +static int +_sharedns_free_pending(void *data) { - // session must be entered already, if provided. - assert(session == NULL || _session_is_active(session)); - if (nsobj == NULL || nsobj == Py_None) { - return NULL; - } - if (!PyDict_CheckExact(nsobj)) { - PyErr_SetString(PyExc_TypeError, "expected a dict"); - return NULL; - } + _sharedns_free((_PyXI_namespace *)data); + return 0; +} - _PyXI_namespace *ns = _sharedns_new(); - if (ns == NULL) { - return NULL; +static void +_destroy_sharedns(_PyXI_namespace *ns) +{ + assert(_sharedns_check_counts(ns)); + assert(ns->numnames == ns->maxitems); + if (ns->numvalues == 0) { + _sharedns_free(ns); + return; } - if (_sharedns_init(ns, nsobj) < 0) { - if (PyDict_Size(nsobj) == 0) { - PyMem_RawFree(ns); - PyErr_Clear(); - return NULL; - } - goto error; + int64_t interpid0; + if (!_sharednsitem_has_value(&ns->items[0], &interpid0)) { + // This shouldn't have been possible. + // We can deal with it in _sharedns_free(). + _sharedns_free(ns); + return; } - - if (_PyXI_FillNamespaceFromDict(ns, nsobj, session) < 0) { - goto error; + PyInterpreterState *interp = _PyInterpreterState_LookUpID(interpid0); + if (interp == PyInterpreterState_Get()) { + _sharedns_free(ns); + return; } - return ns; - -error: - assert(PyErr_Occurred() - || (session != NULL && session->error_override != NULL)); - _sharedns_free(ns); - return NULL; + // One or more items may have interpreter-specific data. + // Currently the xidata for each value is dynamically allocated, + // so technically we don't need to worry about that. + // However, explicitly adding a pending call here is simpler. + (void)_Py_CallInInterpreter(interp, _sharedns_free_pending, ns); } -int -_PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) +static int +_apply_sharedns(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) { - for (Py_ssize_t i=0; i < ns->len; i++) { + for (Py_ssize_t i=0; i < ns->maxitems; i++) { if (_sharednsitem_apply(&ns->items[i], nsobj, dflt) != 0) { return -1; } @@ -2041,9 +2456,69 @@ _PyXI_ApplyNamespace(_PyXI_namespace *ns, PyObject *nsobj, PyObject *dflt) } -/**********************/ -/* high-level helpers */ -/**********************/ +/*********************************/ +/* switched-interpreter sessions */ +/*********************************/ + +struct xi_session { +#define SESSION_UNUSED 0 +#define SESSION_ACTIVE 1 + int status; + int switched; + + // Once a session has been entered, this is the tstate that was + // current before the session. If it is different from cur_tstate + // then we must have switched interpreters. Either way, this will + // be the current tstate once we exit the session. + PyThreadState *prev_tstate; + // Once a session has been entered, this is the current tstate. + // It must be current when the session exits. + PyThreadState *init_tstate; + // This is true if init_tstate needs cleanup during exit. + int own_init_tstate; + + // This is true if, while entering the session, init_thread took + // "ownership" of the interpreter's __main__ module. This means + // it is the only thread that is allowed to run code there. + // (Caveat: for now, users may still run exec() against the + // __main__ module's dict, though that isn't advisable.) + int running; + // This is a cached reference to the __dict__ of the entered + // interpreter's __main__ module. It is looked up when at the + // beginning of the session as a convenience. + PyObject *main_ns; + + // This is a dict of objects that will be available (via sharing) + // once the session exits. Do not access this directly; use + // _PyXI_Preserve() and _PyXI_GetPreserved() instead; + PyObject *_preserved; +}; + +_PyXI_session * +_PyXI_NewSession(void) +{ + _PyXI_session *session = PyMem_RawCalloc(1, sizeof(_PyXI_session)); + if (session == NULL) { + PyErr_NoMemory(); + return NULL; + } + return session; +} + +void +_PyXI_FreeSession(_PyXI_session *session) +{ + assert(session->status == SESSION_UNUSED); + PyMem_RawFree(session); +} + + +static inline int +_session_is_active(_PyXI_session *session) +{ + return session->status == SESSION_ACTIVE; +} + /* enter/exit a cross-interpreter session */ @@ -2051,29 +2526,33 @@ static void _enter_session(_PyXI_session *session, PyInterpreterState *interp) { // Set here and cleared in _exit_session(). + assert(session->status == SESSION_UNUSED); assert(!session->own_init_tstate); assert(session->init_tstate == NULL); assert(session->prev_tstate == NULL); // Set elsewhere and cleared in _exit_session(). assert(!session->running); assert(session->main_ns == NULL); - // Set elsewhere and cleared in _capture_current_exception(). - assert(session->error_override == NULL); - // Set elsewhere and cleared in _PyXI_ApplyCapturedException(). - assert(session->error == NULL); // Switch to interpreter. PyThreadState *tstate = PyThreadState_Get(); PyThreadState *prev = tstate; - if (interp != tstate->interp) { + int same_interp = (interp == tstate->interp); + if (!same_interp) { tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_EXEC); // XXX Possible GILState issues? - session->prev_tstate = PyThreadState_Swap(tstate); - assert(session->prev_tstate == prev); - session->own_init_tstate = 1; + PyThreadState *swapped = PyThreadState_Swap(tstate); + assert(swapped == prev); + (void)swapped; } - session->init_tstate = tstate; - session->prev_tstate = prev; + + *session = (_PyXI_session){ + .status = SESSION_ACTIVE, + .switched = !same_interp, + .init_tstate = tstate, + .prev_tstate = prev, + .own_init_tstate = !same_interp, + }; } static void @@ -2082,16 +2561,16 @@ _exit_session(_PyXI_session *session) PyThreadState *tstate = session->init_tstate; assert(tstate != NULL); assert(PyThreadState_Get() == tstate); + assert(!_PyErr_Occurred(tstate)); // Release any of the entered interpreters resources. - if (session->main_ns != NULL) { - Py_CLEAR(session->main_ns); - } + Py_CLEAR(session->main_ns); + Py_CLEAR(session->_preserved); // Ensure this thread no longer owns __main__. if (session->running) { _PyInterpreterState_SetNotRunningMain(tstate->interp); - assert(!PyErr_Occurred()); + assert(!_PyErr_Occurred(tstate)); session->running = 0; } @@ -2107,25 +2586,15 @@ _exit_session(_PyXI_session *session) else { assert(!session->own_init_tstate); } - session->prev_tstate = NULL; - session->init_tstate = NULL; -} -#ifndef NDEBUG -static int -_session_is_active(_PyXI_session *session) -{ - return (session->init_tstate != NULL); + *session = (_PyXI_session){0}; } -#endif static void -_propagate_not_shareable_error(_PyXI_session *session) +_propagate_not_shareable_error(PyThreadState *tstate, + _PyXI_failure *override) { - if (session == NULL) { - return; - } - PyThreadState *tstate = PyThreadState_Get(); + assert(override != NULL); PyObject *exctype = get_notshareableerror_type(tstate); if (exctype == NULL) { PyErr_FormatUnraisable( @@ -2134,166 +2603,463 @@ _propagate_not_shareable_error(_PyXI_session *session) } if (PyErr_ExceptionMatches(exctype)) { // We want to propagate the exception directly. - session->_error_override = _PyXI_ERR_NOT_SHAREABLE; - session->error_override = &session->_error_override; + *override = (_PyXI_failure){ + .code = _PyXI_ERR_NOT_SHAREABLE, + }; } } -static void -_capture_current_exception(_PyXI_session *session) + +static int _ensure_main_ns(_PyXI_session *, _PyXI_failure *); +static const char * capture_session_error(_PyXI_session *, _PyXI_error *, + _PyXI_failure *); + +int +_PyXI_Enter(_PyXI_session *session, + PyInterpreterState *interp, PyObject *nsupdates, + _PyXI_session_result *result) { - assert(session->error == NULL); - if (!PyErr_Occurred()) { - assert(session->error_override == NULL); - return; +#ifndef NDEBUG + PyThreadState *tstate = _PyThreadState_GET(); // Only used for asserts +#endif + + // Convert the attrs for cross-interpreter use. + _PyXI_namespace *sharedns = NULL; + if (nsupdates != NULL) { + assert(PyDict_Check(nsupdates)); + Py_ssize_t len = PyDict_Size(nsupdates); + if (len < 0) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } + return -1; + } + if (len > 0) { + sharedns = _create_sharedns(nsupdates); + if (sharedns == NULL) { + if (result != NULL) { + result->errcode = _PyXI_ERR_APPLY_NS_FAILURE; + } + return -1; + } + // For now we limit it to shareable objects. + xidata_fallback_t fallback = _PyXIDATA_XIDATA_ONLY; + _PyXI_failure _err = XI_FAILURE_INIT; + if (_fill_sharedns(sharedns, nsupdates, fallback, &_err) < 0) { + assert(_PyErr_Occurred(tstate)); + if (_err.code == _PyXI_ERR_NO_ERROR) { + _err.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } + _destroy_sharedns(sharedns); + if (result != NULL) { + assert(_err.msg == NULL); + result->errcode = _err.code; + } + return -1; + } + } } - // Handle the exception override. - _PyXI_errcode *override = session->error_override; - session->error_override = NULL; - _PyXI_errcode errcode = override != NULL - ? *override - : _PyXI_ERR_UNCAUGHT_EXCEPTION; + // Switch to the requested interpreter (if necessary). + _enter_session(session, interp); + _PyXI_failure override = XI_FAILURE_INIT; + override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; +#ifndef NDEBUG + tstate = _PyThreadState_GET(); +#endif - // Pop the exception object. - PyObject *excval = NULL; - if (errcode == _PyXI_ERR_UNCAUGHT_EXCEPTION) { - // We want to actually capture the current exception. - excval = PyErr_GetRaisedException(); + // Ensure this thread owns __main__. + if (_PyInterpreterState_SetRunningMain(interp) < 0) { + // In the case where we didn't switch interpreters, it would + // be more efficient to leave the exception in place and return + // immediately. However, life is simpler if we don't. + override.code = _PyXI_ERR_ALREADY_RUNNING; + goto error; } - else if (errcode == _PyXI_ERR_ALREADY_RUNNING) { - // We don't need the exception info. - PyErr_Clear(); + session->running = 1; + + // Apply the cross-interpreter data. + if (sharedns != NULL) { + if (_ensure_main_ns(session, &override) < 0) { + goto error; + } + if (_apply_sharedns(sharedns, session->main_ns, NULL) < 0) { + override.code = _PyXI_ERR_APPLY_NS_FAILURE; + goto error; + } + _destroy_sharedns(sharedns); } - else { - // We could do a variety of things here, depending on errcode. - // However, for now we simply capture the exception and save - // the errcode. - excval = PyErr_GetRaisedException(); + + override.code = _PyXI_ERR_NO_ERROR; + assert(!_PyErr_Occurred(tstate)); + return 0; + +error: + // We want to propagate all exceptions here directly (best effort). + assert(override.code != _PyXI_ERR_NO_ERROR); + _PyXI_error err = {0}; + const char *failure = capture_session_error(session, &err, &override); + + // Exit the session. + _exit_session(session); +#ifndef NDEBUG + tstate = _PyThreadState_GET(); +#endif + + if (sharedns != NULL) { + _destroy_sharedns(sharedns); } - // Capture the exception. - _PyXI_error *err = &session->_error; - *err = (_PyXI_error){ - .interp = session->init_tstate->interp, - }; - const char *failure; - if (excval == NULL) { - failure = _PyXI_InitError(err, NULL, errcode); + // Apply the error from the other interpreter. + PyObject *excinfo = _PyXI_ApplyError(&err, failure); + xi_error_clear(&err); + if (excinfo != NULL) { + if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Enter(): uncaught exception discarded"); +#endif + Py_DECREF(excinfo); + } + } + assert(_PyErr_Occurred(tstate)); + + return -1; +} + +static int _pop_preserved(_PyXI_session *, _PyXI_namespace **, PyObject **, + _PyXI_failure *); +static int _finish_preserved(_PyXI_namespace *, PyObject **); + +int +_PyXI_Exit(_PyXI_session *session, _PyXI_failure *override, + _PyXI_session_result *result) +{ + PyThreadState *tstate = _PyThreadState_GET(); + int res = 0; + + // Capture the raised exception, if any. + _PyXI_error err = {0}; + const char *failure = NULL; + if (override != NULL && override->code == _PyXI_ERR_NO_ERROR) { + assert(override->msg == NULL); + override = NULL; + } + if (_PyErr_Occurred(tstate)) { + failure = capture_session_error(session, &err, override); } else { - failure = _PyXI_InitError(err, excval, _PyXI_ERR_UNCAUGHT_EXCEPTION); - Py_DECREF(excval); - if (failure == NULL && override != NULL) { - err->code = errcode; - } + assert(override == NULL); } - // Handle capture failure. - if (failure != NULL) { - // XXX Make this error message more generic. - fprintf(stderr, - "RunFailedError: script raised an uncaught exception (%s)", - failure); - err = NULL; + // Capture the preserved namespace. + _PyXI_namespace *preserved = NULL; + PyObject *preservedobj = NULL; + if (result != NULL) { + assert(!_PyErr_Occurred(tstate)); + _PyXI_failure _override = XI_FAILURE_INIT; + if (_pop_preserved( + session, &preserved, &preservedobj, &_override) < 0) + { + assert(preserved == NULL); + assert(preservedobj == NULL); + if (xi_error_is_set(&err)) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + clear_xi_failure(&_override); + } + else { + if (_override.code == _PyXI_ERR_NO_ERROR) { + _override.code = _PyXI_ERR_UNCAUGHT_EXCEPTION; + } + failure = capture_session_error(session, &err, &_override); + } + } } - // Finished! - assert(!PyErr_Occurred()); - session->error = err; -} + // Exit the session. + assert(!_PyErr_Occurred(tstate)); + _exit_session(session); + tstate = _PyThreadState_GET(); + + // Restore the preserved namespace. + assert(preserved == NULL || preservedobj == NULL); + if (_finish_preserved(preserved, &preservedobj) < 0) { + assert(preservedobj == NULL); + if (xi_error_is_set(&err)) { + // XXX Chain the exception (i.e. set __context__)? + PyErr_FormatUnraisable( + "Exception ignored while capturing preserved objects"); + } + else { + xi_error_set_override_code( + tstate, &err, _PyXI_ERR_PRESERVE_FAILURE); + _propagate_not_shareable_error(tstate, err.override); + } + } + if (result != NULL) { + result->preserved = preservedobj; + result->errcode = err.override != NULL + ? err.override->code + : _PyXI_ERR_NO_ERROR; + } -PyObject * -_PyXI_ApplyCapturedException(_PyXI_session *session) -{ - assert(!PyErr_Occurred()); - assert(session->error != NULL); - PyObject *res = _PyXI_ApplyError(session->error); - assert((res == NULL) != (PyErr_Occurred() == NULL)); - session->error = NULL; + // Apply the error from the other interpreter, if any. + if (xi_error_is_set(&err)) { + res = -1; + assert(!_PyErr_Occurred(tstate)); + PyObject *excinfo = _PyXI_ApplyError(&err, failure); + if (excinfo == NULL) { + assert(_PyErr_Occurred(tstate)); + if (result != NULL && !xi_error_has_override(&err)) { + _PyXI_ClearResult(result); + *result = (_PyXI_session_result){ + .errcode = _PyXI_ERR_EXC_PROPAGATION_FAILURE, + }; + } + } + else if (result != NULL) { + result->excinfo = excinfo; + } + else { +#ifdef Py_DEBUG + fprintf(stderr, "_PyXI_Exit(): uncaught exception discarded"); +#endif + Py_DECREF(excinfo); + } + xi_error_clear(&err); + } return res; } -int -_PyXI_HasCapturedException(_PyXI_session *session) -{ - return session->error != NULL; -} -int -_PyXI_Enter(_PyXI_session *session, - PyInterpreterState *interp, PyObject *nsupdates) +/* in an active cross-interpreter session */ + +static const char * +capture_session_error(_PyXI_session *session, _PyXI_error *err, + _PyXI_failure *override) { - // Convert the attrs for cross-interpreter use. - _PyXI_namespace *sharedns = NULL; - if (nsupdates != NULL) { - sharedns = _PyXI_NamespaceFromDict(nsupdates, NULL); - if (sharedns == NULL && PyErr_Occurred()) { - assert(session->error == NULL); - return -1; + assert(_session_is_active(session)); + assert(!xi_error_is_set(err)); + PyThreadState *tstate = session->init_tstate; + + // Normalize the exception override. + if (override != NULL) { + if (override->code == _PyXI_ERR_UNCAUGHT_EXCEPTION) { + assert(override->msg == NULL); + override = NULL; + } + else { + assert(override->code != _PyXI_ERR_NO_ERROR); } } - // Switch to the requested interpreter (if necessary). - _enter_session(session, interp); - PyThreadState *session_tstate = session->init_tstate; - _PyXI_errcode errcode = _PyXI_ERR_UNCAUGHT_EXCEPTION; + // Handle the exception, if any. + const char *failure = NULL; + PyObject *exc = xi_error_resolve_current_exc(tstate, override); + if (exc != NULL) { + // There is an unhandled exception we need to preserve. + failure = xi_error_set_exc(tstate, err, exc); + Py_DECREF(exc); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable(failure); + } + } - // Ensure this thread owns __main__. - if (_PyInterpreterState_SetRunningMain(interp) < 0) { - // In the case where we didn't switch interpreters, it would - // be more efficient to leave the exception in place and return - // immediately. However, life is simpler if we don't. - errcode = _PyXI_ERR_ALREADY_RUNNING; - goto error; + // Handle the override. + if (override != NULL && failure == NULL) { + xi_error_set_override(tstate, err, override); } - session->running = 1; + // Finished! + assert(!_PyErr_Occurred(tstate)); + return failure; +} + +static int +_ensure_main_ns(_PyXI_session *session, _PyXI_failure *failure) +{ + assert(_session_is_active(session)); + PyThreadState *tstate = session->init_tstate; + if (session->main_ns != NULL) { + return 0; + } // Cache __main__.__dict__. - PyObject *main_mod = _Py_GetMainModule(session_tstate); + PyObject *main_mod = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(main_mod) < 0) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + Py_XDECREF(main_mod); + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; + } + return -1; } PyObject *ns = PyModule_GetDict(main_mod); // borrowed Py_DECREF(main_mod); if (ns == NULL) { - errcode = _PyXI_ERR_MAIN_NS_FAILURE; - goto error; + if (failure != NULL) { + *failure = (_PyXI_failure){ + .code = _PyXI_ERR_MAIN_NS_FAILURE, + }; + } + return -1; } session->main_ns = Py_NewRef(ns); + return 0; +} - // Apply the cross-interpreter data. - if (sharedns != NULL) { - if (_PyXI_ApplyNamespace(sharedns, ns, NULL) < 0) { - errcode = _PyXI_ERR_APPLY_NS_FAILURE; +PyObject * +_PyXI_GetMainNamespace(_PyXI_session *session, _PyXI_failure *failure) +{ + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return NULL; + } + if (_ensure_main_ns(session, failure) < 0) { + return NULL; + } + return session->main_ns; +} + + +static int +_pop_preserved(_PyXI_session *session, + _PyXI_namespace **p_xidata, PyObject **p_obj, + _PyXI_failure *p_failure) +{ + _PyXI_failure failure = XI_FAILURE_INIT; + _PyXI_namespace *xidata = NULL; + assert(_PyThreadState_GET() == session->init_tstate); // active session + + if (session->_preserved == NULL) { + *p_xidata = NULL; + *p_obj = NULL; + return 0; + } + if (session->init_tstate == session->prev_tstate) { + // We did not switch interpreters. + *p_xidata = NULL; + *p_obj = session->_preserved; + session->_preserved = NULL; + return 0; + } + *p_obj = NULL; + + // We did switch interpreters. + Py_ssize_t len = PyDict_Size(session->_preserved); + if (len < 0) { + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } + else if (len == 0) { + *p_xidata = NULL; + } + else { + _PyXI_namespace *xidata = _create_sharedns(session->_preserved); + if (xidata == NULL) { + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } + if (_fill_sharedns(xidata, session->_preserved, + _PyXIDATA_FULL_FALLBACK, &failure) < 0) + { + if (failure.code != _PyXI_ERR_NOT_SHAREABLE) { + assert(failure.msg != NULL); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + } goto error; } - _PyXI_FreeNamespace(sharedns); + *p_xidata = xidata; } + Py_CLEAR(session->_preserved); + return 0; - errcode = _PyXI_ERR_NO_ERROR; - assert(!PyErr_Occurred()); +error: + if (p_failure != NULL) { + *p_failure = failure; + } + if (xidata != NULL) { + _destroy_sharedns(xidata); + } + return -1; +} + +static int +_finish_preserved(_PyXI_namespace *xidata, PyObject **p_preserved) +{ + if (xidata == NULL) { + return 0; + } + int res = -1; + if (p_preserved != NULL) { + PyObject *ns = PyDict_New(); + if (ns == NULL) { + goto finally; + } + if (_apply_sharedns(xidata, ns, NULL) < 0) { + Py_CLEAR(ns); + goto finally; + } + *p_preserved = ns; + } + res = 0; + +finally: + _destroy_sharedns(xidata); + return res; +} + +int +_PyXI_Preserve(_PyXI_session *session, const char *name, PyObject *value, + _PyXI_failure *p_failure) +{ + _PyXI_failure failure = XI_FAILURE_INIT; + if (!_session_is_active(session)) { + PyErr_SetString(PyExc_RuntimeError, "session not active"); + return -1; + } + if (session->_preserved == NULL) { + session->_preserved = PyDict_New(); + if (session->_preserved == NULL) { + set_exc_with_cause(PyExc_RuntimeError, + "failed to initialize preserved objects"); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } + } + if (PyDict_SetItemString(session->_preserved, name, value) < 0) { + set_exc_with_cause(PyExc_RuntimeError, "failed to preserve object"); + failure.code = _PyXI_ERR_PRESERVE_FAILURE; + goto error; + } return 0; error: - assert(PyErr_Occurred()); - // We want to propagate all exceptions here directly (best effort). - assert(errcode != _PyXI_ERR_UNCAUGHT_EXCEPTION); - session->error_override = &errcode; - _capture_current_exception(session); - _exit_session(session); - if (sharedns != NULL) { - _PyXI_FreeNamespace(sharedns); + if (p_failure != NULL) { + *p_failure = failure; } return -1; } +PyObject * +_PyXI_GetPreserved(_PyXI_session_result *result, const char *name) +{ + PyObject *value = NULL; + if (result->preserved != NULL) { + (void)PyDict_GetItemStringRef(result->preserved, name, &value); + } + return value; +} + void -_PyXI_Exit(_PyXI_session *session) +_PyXI_ClearResult(_PyXI_session_result *result) { - _capture_current_exception(session); - _exit_session(session); + Py_CLEAR(result->preserved); + Py_CLEAR(result->excinfo); } diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index 231537c66d78f6..c3c76ae8d9a289 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -12,7 +12,8 @@ typedef _PyXIData_regitem_t dlregitem_t; // forward static void _xidregistry_init(dlregistry_t *); static void _xidregistry_fini(dlregistry_t *); -static xidatafunc _lookup_getdata_from_registry(dlcontext_t *, PyObject *); +static _PyXIData_getdata_t _lookup_getdata_from_registry( + dlcontext_t *, PyObject *); /* used in crossinterp.c */ @@ -49,7 +50,7 @@ get_lookup_context(PyThreadState *tstate, dlcontext_t *res) return 0; } -static xidatafunc +static _PyXIData_getdata_t lookup_getdata(dlcontext_t *ctx, PyObject *obj) { /* Cross-interpreter objects are looked up by exact match on the class. @@ -87,25 +88,52 @@ _PyXIData_FormatNotShareableError(PyThreadState *tstate, va_end(vargs); } +int +_PyXI_UnwrapNotShareableError(PyThreadState * tstate, _PyXI_failure *failure) +{ + PyObject *exctype = get_notshareableerror_type(tstate); + assert(exctype != NULL); + if (!_PyErr_ExceptionMatches(tstate, exctype)) { + return -1; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (failure != NULL) { + _PyXI_errcode code = _PyXI_ERR_NOT_SHAREABLE; + if (_PyXI_InitFailure(failure, code, exc) < 0) { + return -1; + } + } + PyObject *cause = PyException_GetCause(exc); + if (cause != NULL) { + Py_DECREF(exc); + exc = cause; + } + else { + assert(PyException_GetContext(exc) == NULL); + } + _PyErr_SetRaisedException(tstate, exc); + return 0; +} + -xidatafunc +_PyXIData_getdata_t _PyXIData_Lookup(PyThreadState *tstate, PyObject *obj) { dlcontext_t ctx; if (get_lookup_context(tstate, &ctx) < 0) { - return NULL; + return (_PyXIData_getdata_t){0}; } return lookup_getdata(&ctx, obj); } /***********************************************/ -/* a registry of {type -> xidatafunc} */ +/* a registry of {type -> _PyXIData_getdata_t} */ /***********************************************/ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ /* registry lifecycle */ @@ -200,7 +228,7 @@ _xidregistry_find_type(dlregistry_t *xidregistry, PyTypeObject *cls) return NULL; } -static xidatafunc +static _PyXIData_getdata_t _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) { PyTypeObject *cls = Py_TYPE(obj); @@ -209,10 +237,12 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) _xidregistry_lock(xidregistry); dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); - xidatafunc func = matched != NULL ? matched->getdata : NULL; + _PyXIData_getdata_t getdata = matched != NULL + ? matched->getdata + : (_PyXIData_getdata_t){0}; _xidregistry_unlock(xidregistry); - return func; + return getdata; } @@ -220,12 +250,13 @@ _lookup_getdata_from_registry(dlcontext_t *ctx, PyObject *obj) static int _xidregistry_add_type(dlregistry_t *xidregistry, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { dlregitem_t *newhead = PyMem_RawMalloc(sizeof(dlregitem_t)); if (newhead == NULL) { return -1; } + assert((getdata.basic == NULL) != (getdata.fallback == NULL)); *newhead = (dlregitem_t){ // We do not keep a reference, to avoid keeping the class alive. .cls = cls, @@ -283,13 +314,13 @@ _xidregistry_clear(dlregistry_t *xidregistry) int _PyXIData_RegisterClass(PyThreadState *tstate, - PyTypeObject *cls, xidatafunc getdata) + PyTypeObject *cls, _PyXIData_getdata_t getdata) { if (!PyType_Check(cls)) { PyErr_Format(PyExc_ValueError, "only classes may be registered"); return -1; } - if (getdata == NULL) { + if (getdata.basic == NULL && getdata.fallback == NULL) { PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); return -1; } @@ -304,7 +335,8 @@ _PyXIData_RegisterClass(PyThreadState *tstate, dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); if (matched != NULL) { - assert(matched->getdata == getdata); + assert(matched->getdata.basic == getdata.basic); + assert(matched->getdata.fallback == getdata.fallback); matched->refcount += 1; goto finally; } @@ -608,7 +640,8 @@ _tuple_shared_free(void* data) } static int -_tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) +_tuple_shared(PyThreadState *tstate, PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) { Py_ssize_t len = PyTuple_GET_SIZE(obj); if (len < 0) { @@ -636,7 +669,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) int res = -1; if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { - res = _PyObject_GetXIData(tstate, item, xidata_i); + res = _PyObject_GetXIData(tstate, item, fallback, xidata_i); _Py_LeaveRecursiveCallTstate(tstate); } if (res < 0) { @@ -677,44 +710,126 @@ _PyCode_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) return 0; } +// function + +PyObject * +_PyFunction_FromXIData(_PyXIData_t *xidata) +{ + // For now "stateless" functions are the only ones we must accommodate. + + PyObject *code = _PyMarshal_ReadObjectFromXIData(xidata); + if (code == NULL) { + return NULL; + } + // Create a new function. + // For stateless functions (no globals) we use __main__ as __globals__, + // just like we do for builtins like exec(). + assert(PyCode_Check(code)); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *globals = _PyEval_GetGlobalsFromRunningMain(tstate); // borrowed + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + Py_DECREF(code); + return NULL; + } + globals = PyDict_New(); + if (globals == NULL) { + Py_DECREF(code); + return NULL; + } + } + else { + Py_INCREF(globals); + } + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { + Py_DECREF(code); + Py_DECREF(globals); + return NULL; + } + PyObject *func = PyFunction_New(code, globals); + Py_DECREF(code); + Py_DECREF(globals); + return func; +} + +int +_PyFunction_GetXIData(PyThreadState *tstate, PyObject *func, + _PyXIData_t *xidata) +{ + if (!PyFunction_Check(func)) { + const char *msg = "expected a function, got %R"; + format_notshareableerror(tstate, NULL, 0, msg, func); + return -1; + } + if (_PyFunction_VerifyStateless(tstate, func) < 0) { + PyObject *cause = _PyErr_GetRaisedException(tstate); + assert(cause != NULL); + const char *msg = "only stateless functions are shareable"; + set_notshareableerror(tstate, cause, 0, msg); + Py_DECREF(cause); + return -1; + } + PyObject *code = PyFunction_GET_CODE(func); + + // Ideally code objects would be immortal and directly shareable. + // In the meantime, we use marshal. + if (_PyMarshal_GetXIData(tstate, code, xidata) < 0) { + return -1; + } + // Replace _PyMarshal_ReadObjectFromXIData. + // (_PyFunction_FromXIData() will call it.) + _PyXIData_SET_NEW_OBJECT(xidata, _PyFunction_FromXIData); + return 0; +} + // registration static void _register_builtins_for_crossinterpreter_data(dlregistry_t *xidregistry) { +#define REGISTER(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.basic=(GETDATA)})) +#define REGISTER_FALLBACK(TYPE, GETDATA) \ + _xidregistry_add_type(xidregistry, (PyTypeObject *)TYPE, \ + ((_PyXIData_getdata_t){.fallback=(GETDATA)})) // None - if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { + if (REGISTER(Py_TYPE(Py_None), _none_shared) != 0) { Py_FatalError("could not register None for cross-interpreter sharing"); } // int - if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { + if (REGISTER(&PyLong_Type, _long_shared) != 0) { Py_FatalError("could not register int for cross-interpreter sharing"); } // bytes - if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _PyBytes_GetXIData) != 0) { + if (REGISTER(&PyBytes_Type, _PyBytes_GetXIData) != 0) { Py_FatalError("could not register bytes for cross-interpreter sharing"); } // str - if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { + if (REGISTER(&PyUnicode_Type, _str_shared) != 0) { Py_FatalError("could not register str for cross-interpreter sharing"); } // bool - if (_xidregistry_add_type(xidregistry, &PyBool_Type, _bool_shared) != 0) { + if (REGISTER(&PyBool_Type, _bool_shared) != 0) { Py_FatalError("could not register bool for cross-interpreter sharing"); } // float - if (_xidregistry_add_type(xidregistry, &PyFloat_Type, _float_shared) != 0) { + if (REGISTER(&PyFloat_Type, _float_shared) != 0) { Py_FatalError("could not register float for cross-interpreter sharing"); } // tuple - if (_xidregistry_add_type(xidregistry, &PyTuple_Type, _tuple_shared) != 0) { + if (REGISTER_FALLBACK(&PyTuple_Type, _tuple_shared) != 0) { Py_FatalError("could not register tuple for cross-interpreter sharing"); } + + // For now, we do not register PyCode_Type or PyFunction_Type. +#undef REGISTER +#undef REGISTER_FALLBACK } diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index ca4ca1cf123e49..98411adc5eb3f6 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -7,13 +7,6 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) } PyObject *exc = _PyErr_GetRaisedException(tstate); assert(exc != NULL); - PyObject *ctx = PyException_GetContext(exc); - if (ctx == NULL) { - PyException_SetContext(exc, Py_NewRef(cause)); - } - else { - Py_DECREF(ctx); - } assert(PyException_GetCause(exc) == NULL); PyException_SetCause(exc, Py_NewRef(cause)); _PyErr_SetRaisedException(tstate, exc); @@ -24,7 +17,7 @@ _ensure_current_cause(PyThreadState *tstate, PyObject *cause) static PyTypeObject _PyExc_InterpreterError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterError", + .tp_name = "concurrent.interpreters.InterpreterError", .tp_doc = PyDoc_STR("A cross-interpreter operation failed"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -37,7 +30,7 @@ PyObject *PyExc_InterpreterError = (PyObject *)&_PyExc_InterpreterError; static PyTypeObject _PyExc_InterpreterNotFoundError = { PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "interpreters.InterpreterNotFoundError", + .tp_name = "concurrent.interpreters.InterpreterNotFoundError", .tp_doc = PyDoc_STR("An interpreter was not found"), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, //.tp_traverse = ((PyTypeObject *)PyExc_Exception)->tp_traverse, @@ -51,7 +44,7 @@ PyObject *PyExc_InterpreterNotFoundError = (PyObject *)&_PyExc_InterpreterNotFou static int _init_notshareableerror(exceptions_t *state) { - const char *name = "interpreters.NotShareableError"; + const char *name = "concurrent.interpreters.NotShareableError"; PyObject *base = PyExc_TypeError; PyObject *ns = NULL; PyObject *exctype = PyErr_NewException(name, base, ns); diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c new file mode 100644 index 00000000000000..b1236e6b123fc9 --- /dev/null +++ b/Python/emscripten_syscalls.c @@ -0,0 +1,319 @@ +#include "emscripten.h" +#include "stdio.h" + +// If we're running in node, report the UID of the user in the native system as +// the UID of the user. Since the nodefs will report the uid correctly, if we +// don't make getuid report it correctly too we'll see some permission errors. +// Normally __syscall_getuid32 is a stub that always returns 0 but it is +// defined with weak linkage so we can override it. +EM_JS(int, __syscall_getuid32_js, (void), { + // If we're in node and we can, report the native uid + if (ENVIRONMENT_IS_NODE) { + return process.getuid(); + } + // Fall back to the stub case of returning 0. + return 0; +}) + +int __syscall_getuid32(void) { + return __syscall_getuid32_js(); +} + +EM_JS(int, __syscall_umask_js, (int mask), { + // If we're in node and we can, call native process.umask() + if (ENVIRONMENT_IS_NODE) { + try { + return process.umask(mask); + } catch(e) { + // oops... + // NodeJS docs: "In Worker threads, process.umask(mask) will throw an exception." + // umask docs: "This system call always succeeds" + return 0; + } + } + // Fall back to the stub case of returning 0. + return 0; +}) + +int __syscall_umask(int mask) { + return __syscall_umask_js(mask); +} + +#include <wasi/api.h> +#include <errno.h> +#include <fcntl.h> + +// Variant of EM_JS that does C preprocessor substitution on the body +#define EM_JS_MACROS(ret, func_name, args, body...) \ + EM_JS(ret, func_name, args, body) + +EM_JS_MACROS(void, _emscripten_promising_main_js, (void), { + // Define FS.createAsyncInputDevice(), This is quite similar to + // FS.createDevice() defined here: + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642 + // but instead of returning one byte at a time, the input() function should + // return a Uint8Array. This makes the handler code simpler, the + // `createAsyncInputDevice` simpler, and everything faster. + FS.createAsyncInputDevice = function(parent, name, input) { + parent = typeof parent == 'string' ? parent : FS.getPath(parent); + var path = PATH.join2(parent, name); + var mode = FS_getMode(true, false); + FS.createDevice.major ||= 64; + var dev = FS.makedev(FS.createDevice.major++, 0); + async function getDataBuf() { + var buf; + try { + buf = await input(); + } catch (e) { + throw new FS.ErrnoError(EIO); + } + if (!buf?.byteLength) { + throw new FS.ErrnoError(EAGAIN); + } + ops._dataBuf = buf; + } + + var ops = { + _dataBuf: new Uint8Array(0), + open(stream) { + stream.seekable = false; + }, + async readAsync(stream, buffer, offset, length, pos /* ignored */) { + buffer = buffer.subarray(offset, offset + length); + if (!ops._dataBuf.byteLength) { + await getDataBuf(); + } + var toRead = Math.min(ops._dataBuf.byteLength, buffer.byteLength); + buffer.subarray(0, toRead).set(ops._dataBuf); + buffer = buffer.subarray(toRead); + ops._dataBuf = ops._dataBuf.subarray(toRead); + if (toRead) { + stream.node.atime = Date.now(); + } + return toRead; + }, + }; + FS.registerDevice(dev, ops); + return FS.mkdev(path, mode, dev); + }; + if (!WebAssembly.promising) { + // No stack switching support =( + return; + } + const origResolveGlobalSymbol = resolveGlobalSymbol; + if (ENVIRONMENT_IS_NODE && !Module.onExit) { + Module.onExit = (code) => process.exit(code); + } + // * wrap the main symbol with WebAssembly.promising, + // * call exit_with_live_runtime() to prevent emscripten from shutting down + // the runtime before the promise resolves, + // * call onExit / process.exit ourselves, since exit_with_live_runtime() + // prevented Emscripten from calling it normally. + resolveGlobalSymbol = function (name, direct = false) { + const orig = origResolveGlobalSymbol(name, direct); + if (name === "main") { + const main = WebAssembly.promising(orig.sym); + orig.sym = (...args) => { + (async () => { + const ret = await main(...args); + Module.onExit?.(ret); + })(); + _emscripten_exit_with_live_runtime(); + }; + } + return orig; + }; +}) + +__attribute__((constructor)) void _emscripten_promising_main(void) { + _emscripten_promising_main_js(); +} + + +#define IOVEC_T_BUF_OFFSET 0 +#define IOVEC_T_BUF_LEN_OFFSET 4 +#define IOVEC_T_SIZE 8 +_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET, + "Unexpected __wasi_iovec_t layout"); +_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET, + "Unexpected __wasi_iovec_t layout"); +_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE, + "Unexpected __wasi_iovec_t layout"); + +// If the stream has a readAsync handler, read to buffer defined in iovs, write +// number of bytes read to *nread, and return a promise that resolves to the +// errno. Otherwise, return null. +EM_JS_MACROS(__externref_t, __maybe_fd_read_async, ( + __wasi_fd_t fd, + const __wasi_iovec_t *iovs, + size_t iovcnt, + __wasi_size_t *nread +), { + if (!WebAssembly.promising) { + return null; + } + var stream; + try { + stream = SYSCALLS.getStreamFromFD(fd); + } catch (e) { + // If the fd was already closed or never existed, getStreamFromFD() + // raises. We'll let fd_read_orig() handle setting errno. + return null; + } + if (!stream.stream_ops.readAsync) { + // Not an async device. Fall back to __wasi_fd_read_orig(). + return null; + } + return (async () => { + // This is the same as libwasi.js fd_read() and doReadv() except we use + // readAsync and we await it. + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331 + // https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197 + try { + var ret = 0; + for (var i = 0; i < iovcnt; i++) { + var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4]; + var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4]; + iovs += IOVEC_T_SIZE; + var curr = await stream.stream_ops.readAsync(stream, HEAP8, ptr, len); + if (curr < 0) return -1; + ret += curr; + if (curr < len) break; // nothing more to read + } + HEAP32[nread/4] = ret; + return 0; + } catch (e) { + if (e.name !== 'ErrnoError') { + throw e; + } + return e["errno"]; + } + })(); +}; +); + +// Bind original fd_read syscall to __wasi_fd_read_orig(). +__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs, + size_t iovs_len, __wasi_size_t *nread) + __attribute__((__import_module__("wasi_snapshot_preview1"), + __import_name__("fd_read"), __warn_unused_result__)); + +// Take a promise that resolves to __wasi_errno_t and suspend until it resolves, +// get the output. +EM_JS(int, __block_for_int, (__externref_t p), { + return p; +} +if (WebAssembly.Suspending) { + __block_for_int = new WebAssembly.Suspending(__block_for_int); +} +) + +// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned +// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_int +// to get the result. +__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs, + size_t iovs_len, __wasi_size_t *nread) { + __externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread); + if (__builtin_wasm_ref_is_null_extern(p)) { + return __wasi_fd_read_orig(fd, iovs, iovs_len, nread); + } + return __block_for_int(p); +} + +#include <poll.h> +#define POLLFD_FD 0 +#define POLLFD_EVENTS 4 +#define POLLFD_REVENTS 6 +#define POLLFD_SIZE 8 +_Static_assert(offsetof(struct pollfd, fd) == 0, "Unepxected pollfd struct layout"); +_Static_assert(offsetof(struct pollfd, events) == 4, "Unepxected pollfd struct layout"); +_Static_assert(offsetof(struct pollfd, revents) == 6, "Unepxected pollfd struct layout"); +_Static_assert(sizeof(struct pollfd) == 8, "Unepxected pollfd struct layout"); + +EM_JS_MACROS(__externref_t, __maybe_poll_async, (intptr_t fds, int nfds, int timeout), { + if (!WebAssembly.promising) { + return null; + } + return (async function() { + try { + var nonzero = 0; + var promises = []; + for (var i = 0; i < nfds; i++) { + var pollfd = fds + POLLFD_SIZE * i; + var fd = HEAP32[(pollfd + POLLFD_FD)/4]; + var events = HEAP16[(pollfd + POLLFD_EVENTS)/2]; + var mask = POLLNVAL; + var stream = FS.getStream(fd); + if (stream) { + mask = POLLIN | POLLOUT; + if (stream.stream_ops.pollAsync) { + promises.push(stream.stream_ops.pollAsync(stream, timeout).then((mask) => { + mask &= events | POLLERR | POLLHUP; + HEAP16[(pollfd + POLLFD_REVENTS)/2] = mask; + if (mask) { + nonzero ++; + } + })); + } else if (stream.stream_ops.poll) { + var mask = stream.stream_ops.poll(stream, timeout); + mask &= events | POLLERR | POLLHUP; + HEAP16[(pollfd + POLLFD_REVENTS)/2] = mask; + if (mask) { + nonzero ++; + } + } + } + } + await Promise.all(promises); + return nonzero; + } catch(e) { + if (e?.name !== "ErrnoError") throw e; + return -e["errno"]; + } + })(); +}); + +// Bind original poll syscall to syscall_poll_orig(). +int syscall_poll_orig(intptr_t fds, int nfds, int timeout) + __attribute__((__import_module__("env"), + __import_name__("__syscall_poll"), __warn_unused_result__)); + +int __syscall_poll(intptr_t fds, int nfds, int timeout) { + __externref_t p = __maybe_poll_async(fds, nfds, timeout); + if (__builtin_wasm_ref_is_null_extern(p)) { + return syscall_poll_orig(fds, nfds, timeout); + } + return __block_for_int(p); +} + +#include <sys/ioctl.h> + +int syscall_ioctl_orig(int fd, int request, void* varargs) + __attribute__((__import_module__("env"), + __import_name__("__syscall_ioctl"), __warn_unused_result__)); + +int __syscall_ioctl(int fd, int request, void* varargs) { + if (request == FIOCLEX || request == FIONCLEX) { + return 0; + } + if (request == FIONBIO) { + // Implement FIONBIO via fcntl. + // TODO: Upstream this. + int flags = fcntl(fd, F_GETFL, 0); + int nonblock = **((int**)varargs); + if (flags < 0) { + return -errno; + } + if (nonblock) { + flags |= O_NONBLOCK; + } else { + flags &= (~O_NONBLOCK); + } + int res = fcntl(fd, F_SETFL, flags); + if (res < 0) { + return -errno; + } + return res; + } + return syscall_ioctl_orig(fd, request, varargs); +} diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index a7bb685bf3dc6d..d61146504d0959 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -2,224 +2,58 @@ #include <emscripten.h> // EM_JS, EM_JS_DEPS #include <Python.h> -#include "pycore_runtime.h" // _PyRuntime -typedef int (*CountArgsFunc)(PyCFunctionWithKeywords func); - -// Offset of emscripten_count_args_function in _PyRuntimeState. There's a couple -// of alternatives: -// 1. Just make emscripten_count_args_function a real C global variable instead -// of a field of _PyRuntimeState. This would violate our rule against mutable -// globals. -// 2. #define a preprocessor constant equal to a hard coded number and make a -// _Static_assert(offsetof(_PyRuntimeState, emscripten_count_args_function) -// == OURCONSTANT) This has the disadvantage that we have to update the hard -// coded constant when _PyRuntimeState changes -// -// So putting the mutable constant in _PyRuntime and using a immutable global to -// record the offset so we can access it from JS is probably the best way. -EMSCRIPTEN_KEEPALIVE const int _PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET = offsetof(_PyRuntimeState, emscripten_count_args_function); - -EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { - return Module._PyEM_CountArgsPtr; // initialized below +EM_JS( +PyObject*, +_PyEM_TrampolineCall_inner, (int* success, + PyCFunctionWithKeywords func, + PyObject *arg1, + PyObject *arg2, + PyObject *arg3), { + // JavaScript fallback trampoline + return wasmTable.get(func)(arg1, arg2, arg3); } -// Binary module for the checks. It has to be done in web assembly because -// clang/llvm have no support yet for the reference types yet. In fact, the wasm -// binary toolkit doesn't yet support the ref.test instruction either. To -// convert the following textual wasm to a binary, you can build wabt from this -// branch: https://github.com/WebAssembly/wabt/pull/2529 and then use that -// wat2wasm binary. -// -// (module -// (type $type0 (func (param) (result i32))) -// (type $type1 (func (param i32) (result i32))) -// (type $type2 (func (param i32 i32) (result i32))) -// (type $type3 (func (param i32 i32 i32) (result i32))) -// (type $blocktype (func (param i32) (result))) -// (table $funcs (import "e" "t") 0 funcref) -// (export "f" (func $f)) -// (func $f (param $fptr i32) (result i32) -// (local $fref funcref) -// local.get $fptr -// table.get $funcs -// local.tee $fref -// ref.test $type3 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b -// i32.const 3 -// return -// ) -// local.get $fref -// ref.test $type2 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b -// i32.const 2 -// return -// ) -// local.get $fref -// ref.test $type1 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b -// i32.const 1 -// return -// ) -// local.get $fref -// ref.test $type0 -// (block $b (type $blocktype) -// i32.eqz -// br_if $b -// i32.const 0 -// return -// ) -// i32.const -1 -// ) -// ) - -function getPyEMCountArgsPtr() { - let isIOS = globalThis.navigator && /iPad|iPhone|iPod/.test(navigator.platform); +// Try to replace the JS definition of _PyEM_TrampolineCall_inner with a wasm +// version. +(function () { + // Starting with iOS 18.3.1, WebKit on iOS has an issue with the garbage + // collector that breaks the call trampoline. See #130418 and + // https://bugs.webkit.org/show_bug.cgi?id=293113 for details. + let isIOS = globalThis.navigator && ( + /iPad|iPhone|iPod/.test(navigator.userAgent) || + // Starting with iPadOS 13, iPads might send a platform string that looks like a desktop Mac. + // To differentiate, we check if the platform is 'MacIntel' (common for Macs and newer iPads) + // AND if the device has multi-touch capabilities (navigator.maxTouchPoints > 1) + (navigator.platform === 'MacIntel' && typeof navigator.maxTouchPoints !== 'undefined' && navigator.maxTouchPoints > 1) + ); if (isIOS) { - return 0; + return; } - - // Try to initialize countArgsFunc - const code = new Uint8Array([ - 0x00, 0x61, 0x73, 0x6d, // \0asm magic number - 0x01, 0x00, 0x00, 0x00, // version 1 - 0x01, 0x1b, // Type section, body is 0x1b bytes - 0x05, // 6 entries - 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) - 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) - 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) - 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) - 0x60, 0x01, 0x7f, 0x00, // (type $blocktype (func (param i32) (result))) - 0x02, 0x09, // Import section, 0x9 byte body - 0x01, // 1 import (table $funcs (import "e" "t") 0 funcref) - 0x01, 0x65, // "e" - 0x01, 0x74, // "t" - 0x01, // importing a table - 0x70, // of entry type funcref - 0x00, 0x00, // table limits: no max, min of 0 - 0x03, 0x02, // Function section - 0x01, 0x01, // We're going to define one function of type 1 (func (param i32) (result i32)) - 0x07, 0x05, // export section - 0x01, // 1 export - 0x01, 0x66, // called "f" - 0x00, // a function - 0x00, // at index 0 - - 0x0a, 0x44, // Code section, - 0x01, 0x42, // one entry of length 50 - 0x01, 0x01, 0x70, // one local of type funcref - // Body of the function - 0x20, 0x00, // local.get $fptr - 0x25, 0x00, // table.get $funcs - 0x22, 0x01, // local.tee $fref - 0xfb, 0x14, 0x03, // ref.test $type3 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b - 0x41, 0x03, // i32.const 3 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x02, // ref.test $type2 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b - 0x41, 0x02, // i32.const 2 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x01, // ref.test $type1 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b - 0x41, 0x01, // i32.const 1 - 0x0f, // return - 0x0b, // end block - - 0x20, 0x01, // local.get $fref - 0xfb, 0x14, 0x00, // ref.test $type0 - 0x02, 0x04, // block $b (type $blocktype) - 0x45, // i32.eqz - 0x0d, 0x00, // br_if $b - 0x41, 0x00, // i32.const 0 - 0x0f, // return - 0x0b, // end block - - 0x41, 0x7f, // i32.const -1 - 0x0b // end function - ]); try { - const mod = new WebAssembly.Module(code); - const inst = new WebAssembly.Instance(mod, { e: { t: wasmTable } }); - return addFunction(inst.exports.f); + const trampolineModule = getWasmTrampolineModule(); + const trampolineInstance = new WebAssembly.Instance(trampolineModule, { + env: { __indirect_function_table: wasmTable, memory: wasmMemory }, + }); + _PyEM_TrampolineCall_inner = trampolineInstance.exports.trampoline_call; } catch (e) { - // If something goes wrong, we'll null out _PyEM_CountFuncParams and fall - // back to the JS trampoline. - return 0; + // Compilation error due to missing wasm-gc support, fall back to JS + // trampoline } -} - -addOnPreRun(() => { - const ptr = getPyEMCountArgsPtr(); - Module._PyEM_CountArgsPtr = ptr; - const offset = HEAP32[__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET / 4]; - HEAP32[(__PyRuntime + offset) / 4] = ptr; -}); +})(); ); -void -_Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime) -{ - runtime->emscripten_count_args_function = _PyEM_GetCountArgsPtr(); -} - -// We have to be careful to work correctly with memory snapshots. Even if we are -// loading a memory snapshot, we need to perform the JS initialization work. -// That means we can't call the initialization code from C. Instead, we export -// this function pointer to JS and then fill it in a preRun function which runs -// unconditionally. -/** - * Backwards compatible trampoline works with all JS runtimes - */ -EM_JS(PyObject*, _PyEM_TrampolineCall_JS, (PyCFunctionWithKeywords func, PyObject *arg1, PyObject *arg2, PyObject *arg3), { - return wasmTable.get(func)(arg1, arg2, arg3); -}); - -typedef PyObject* (*zero_arg)(void); -typedef PyObject* (*one_arg)(PyObject*); -typedef PyObject* (*two_arg)(PyObject*, PyObject*); -typedef PyObject* (*three_arg)(PyObject*, PyObject*, PyObject*); - PyObject* _PyEM_TrampolineCall(PyCFunctionWithKeywords func, PyObject* self, PyObject* args, PyObject* kw) { - CountArgsFunc count_args = _PyRuntime.emscripten_count_args_function; - if (count_args == 0) { - return _PyEM_TrampolineCall_JS(func, self, args, kw); - } - switch (count_args(func)) { - case 0: - return ((zero_arg)func)(); - case 1: - return ((one_arg)func)(self); - case 2: - return ((two_arg)func)(self, args); - case 3: - return ((three_arg)func)(self, args, kw); - default: - PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); - return NULL; + int success = 1; + PyObject *result = _PyEM_TrampolineCall_inner(&success, func, self, args, kw); + if (!success) { + PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); } + return result; } #endif diff --git a/Python/emscripten_trampoline_inner.c b/Python/emscripten_trampoline_inner.c new file mode 100644 index 00000000000000..a2bad4857ed089 --- /dev/null +++ b/Python/emscripten_trampoline_inner.c @@ -0,0 +1,38 @@ +// This file must be compiled with -mgc to enable the extra wasm-gc +// instructions. It has to be compiled separately because not enough JS runtimes +// support wasm-gc yet. If the JS runtime does not support wasm-gc (or has buggy +// support like iOS), we will use the JS trampoline fallback. + +// We can't import Python.h here because it is compiled/linked with -nostdlib. +// We don't need to know what's inside PyObject* anyways. We could just call it +// void* everywhere. There are two reasons to do this: +// 1. to improve readability +// 2. eventually when we are comfortable requiring wasm-gc, we can merge this +// into emscripten_trampoline.c without worrying about it. +typedef void PyObject; + +typedef PyObject* (*three_arg)(PyObject*, PyObject*, PyObject*); +typedef PyObject* (*two_arg)(PyObject*, PyObject*); +typedef PyObject* (*one_arg)(PyObject*); +typedef PyObject* (*zero_arg)(void); + +#define TRY_RETURN_CALL(ty, args...) \ + if (__builtin_wasm_test_function_pointer_signature((ty)func)) { \ + return ((ty)func)(args); \ + } + +__attribute__((export_name("trampoline_call"))) PyObject* +trampoline_call(int* success, + void* func, + PyObject* self, + PyObject* args, + PyObject* kw) +{ + *success = 1; + TRY_RETURN_CALL(three_arg, self, args, kw); + TRY_RETURN_CALL(two_arg, self, args); + TRY_RETURN_CALL(one_arg, self); + TRY_RETURN_CALL(zero_arg); + *success = 0; + return 0; +} diff --git a/Python/errors.c b/Python/errors.c index 81f267b043afaf..89ea79f8c914fb 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -1938,8 +1938,8 @@ int _PyErr_EmitSyntaxWarning(PyObject *msg, PyObject *filename, int lineno, int col_offset, int end_lineno, int end_col_offset) { - if (_PyErr_WarnExplicitObjectWithContext(PyExc_SyntaxWarning, msg, - filename, lineno) < 0) + if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, + filename, lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) { /* Replace the SyntaxWarning exception with a SyntaxError diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3e51ac41fa3a2e..e3f4e5bdb65fd5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1576,7 +1576,7 @@ new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; - frame->return_offset = 6 ; + frame->return_offset = 6u ; stack_pointer[-3].bits = (uintptr_t)new_frame; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -1944,8 +1944,8 @@ gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - assert( 2 + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)( 2 + oparg); + assert( 2u + oparg <= UINT16_MAX); + frame->return_offset = (uint16_t)( 2u + oparg); gen_frame->previous = frame; stack_pointer[-1].bits = (uintptr_t)gen_frame; break; @@ -4273,8 +4273,17 @@ _PyStackRef next; iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + stack_pointer = _PyFrame_GetStackPointer(frame); + JUMP_TO_ERROR(); + } _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + PyObject *next_o = func(iter_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { @@ -4535,7 +4544,7 @@ gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; gen_frame->previous = frame; - frame->return_offset = (uint16_t)( 2 + oparg); + frame->return_offset = (uint16_t)( 2u + oparg); stack_pointer[0].bits = (uintptr_t)gen_frame; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 78ef02a911a72b..04cd7a003da963 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1884,6 +1884,10 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg) result = PyNumber_Negative(operand); break; case UNARY_INVERT: + // XXX: This should be removed once the ~bool depreciation expires. + if (PyBool_Check(operand)) { + return NULL; + } result = PyNumber_Invert(operand); break; case UNARY_NOT: { @@ -2862,8 +2866,10 @@ optimize_load_fast(cfg_builder *g) // how many inputs should be left on the stack. // Opcodes that consume no inputs + case FORMAT_SIMPLE: case GET_ANEXT: case GET_LEN: + case GET_YIELD_FROM_ITER: case IMPORT_FROM: case MATCH_KEYS: case MATCH_MAPPING: @@ -2898,6 +2904,16 @@ optimize_load_fast(cfg_builder *g) break; } + case END_SEND: + case SET_FUNCTION_ATTRIBUTE: { + assert(_PyOpcode_num_popped(opcode, oparg) == 2); + assert(_PyOpcode_num_pushed(opcode, oparg) == 1); + ref tos = ref_stack_pop(&refs); + ref_stack_pop(&refs); + PUSH_REF(tos.instr, tos.local); + break; + } + // Opcodes that consume some inputs and push new values case CHECK_EXC_MATCH: { ref_stack_pop(&refs); @@ -2927,6 +2943,14 @@ optimize_load_fast(cfg_builder *g) break; } + case LOAD_SPECIAL: + case PUSH_EXC_INFO: { + ref tos = ref_stack_pop(&refs); + PUSH_REF(i, NOT_LOCAL); + PUSH_REF(tos.instr, tos.local); + break; + } + case SEND: { load_fast_push_block(&sp, instr->i_target, refs.size); ref_stack_pop(&refs); @@ -3439,11 +3463,13 @@ convert_pseudo_conditional_jumps(cfg_builder *g) instr->i_opcode = instr->i_opcode == JUMP_IF_FALSE ? POP_JUMP_IF_FALSE : POP_JUMP_IF_TRUE; location loc = instr->i_loc; + basicblock *except = instr->i_except; cfg_instr copy = { .i_opcode = COPY, .i_oparg = 1, .i_loc = loc, .i_target = NULL, + .i_except = except, }; RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &copy)); cfg_instr to_bool = { @@ -3451,6 +3477,7 @@ convert_pseudo_conditional_jumps(cfg_builder *g) .i_oparg = 0, .i_loc = loc, .i_target = NULL, + .i_except = except, }; RETURN_IF_ERROR(basicblock_insert_instruction(b, i++, &to_bool)); } @@ -3693,6 +3720,7 @@ insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entrybl .i_oparg = 0, .i_loc = loc, .i_target = NULL, + .i_except = NULL, }; RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen)); cfg_instr pop_top = { @@ -3700,6 +3728,7 @@ insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entrybl .i_oparg = 0, .i_loc = loc, .i_target = NULL, + .i_except = NULL, }; RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top)); } @@ -3730,6 +3759,7 @@ insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entrybl .i_oparg = oldindex, .i_loc = NO_LOCATION, .i_target = NULL, + .i_except = NULL, }; if (basicblock_insert_instruction(entryblock, ncellsused, &make_cell) < 0) { PyMem_RawFree(sorted); @@ -3746,6 +3776,7 @@ insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entrybl .i_oparg = nfreevars, .i_loc = NO_LOCATION, .i_target = NULL, + .i_except = NULL, }; RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &copy_frees)); } diff --git a/Python/gc.c b/Python/gc.c index 7b0e6d6e803504..c87d714ce4cfb1 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1590,7 +1590,7 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; + intptr_t max_heap_fraction = new_objects*2; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; @@ -1605,6 +1605,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += assess_work_to_do(gcstate); + if (gcstate->work_to_do < 0) { + return; + } untrack_tuples(&gcstate->young.head); if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); @@ -1647,7 +1650,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1763,7 +1765,7 @@ gc_collect_region(PyThreadState *tstate, Py_ssize_t n = 0; for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; - if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } stats->uncollectable = n; @@ -2022,8 +2024,10 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "start", generation, &stats); } + PyTime_t t1; if (gcstate->debug & _PyGC_DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); + (void)PyTime_PerfCounterRaw(&t1); show_stats_each_generations(gcstate); } if (PyDTrace_GC_START_ENABLED()) { @@ -2060,6 +2064,17 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) #endif validate_spaces(gcstate); _Py_atomic_store_int(&gcstate->collecting, 0); + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PyTime_t t2; + (void)PyTime_PerfCounterRaw(&t2); + double d = PyTime_AsSecondsDouble(t2 - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + stats.collected + stats.uncollectable, stats.uncollectable, d + ); + } + return stats.uncollectable + stats.collected; } @@ -2231,21 +2246,11 @@ _Py_ScheduleGC(PyThreadState *tstate) } void -_PyObject_GC_Link(PyObject *op) +_Py_TriggerGC(struct _gc_runtime_state *gcstate) { - PyGC_Head *gc = AS_GC(op); - // gc must be correctly aligned - _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0); - PyThreadState *tstate = _PyThreadState_GET(); - GCState *gcstate = &tstate->interp->gc; - gc->_gc_next = 0; - gc->_gc_prev = 0; - gcstate->young.count++; /* number of allocated GC objects */ - gcstate->heap_size++; - if (gcstate->young.count > gcstate->young.threshold && - gcstate->enabled && - gcstate->young.threshold && + if (gcstate->enabled && + gcstate->young.threshold != 0 && !_Py_atomic_load_int_relaxed(&gcstate->collecting) && !_PyErr_Occurred(tstate)) { @@ -2253,6 +2258,17 @@ _PyObject_GC_Link(PyObject *op) } } +void +_PyObject_GC_Link(PyObject *op) +{ + PyGC_Head *gc = AS_GC(op); + // gc must be correctly aligned + _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0); + gc->_gc_next = 0; + gc->_gc_prev = 0; + +} + void _Py_RunGC(PyThreadState *tstate) { @@ -2359,6 +2375,11 @@ PyObject_GC_Del(void *op) PyGC_Head *g = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op)) { gc_list_remove(g); + GCState *gcstate = get_gc_state(); + if (gcstate->young.count > 0) { + gcstate->young.count--; + } + gcstate->heap_size--; #ifdef Py_DEBUG PyObject *exc = PyErr_GetRaisedException(); if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, @@ -2372,11 +2393,6 @@ PyObject_GC_Del(void *op) PyErr_SetRaisedException(exc); #endif } - GCState *gcstate = get_gc_state(); - if (gcstate->young.count > 0) { - gcstate->young.count--; - } - gcstate->heap_size--; PyObject_Free(((char *)op)-presize); } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 757e9cb3227e26..d096accb4371c1 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -675,10 +675,11 @@ gc_mark_span_push(gc_span_stack_t *ss, PyObject **start, PyObject **end) else { ss->capacity *= 2; } - ss->stack = (gc_span_t *)PyMem_Realloc(ss->stack, ss->capacity * sizeof(gc_span_t)); - if (ss->stack == NULL) { + gc_span_t *new_stack = (gc_span_t *)PyMem_Realloc(ss->stack, ss->capacity * sizeof(gc_span_t)); + if (new_stack == NULL) { return -1; } + ss->stack = new_stack; } assert(end > start); ss->stack[ss->size].start = start; @@ -1927,8 +1928,7 @@ get_process_mem_usage(void) } #elif __linux__ - // Linux, use smaps_rollup (Kernel >= 4.4) for RSS + Swap - FILE* fp = fopen("/proc/self/smaps_rollup", "r"); + FILE* fp = fopen("/proc/self/status", "r"); if (fp == NULL) { return -1; } @@ -1938,11 +1938,11 @@ get_process_mem_usage(void) long long swap_kb = -1; while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) { - if (rss_kb == -1 && strncmp(line_buffer, "Rss:", 4) == 0) { - sscanf(line_buffer + 4, "%lld", &rss_kb); + if (rss_kb == -1 && strncmp(line_buffer, "VmRSS:", 6) == 0) { + sscanf(line_buffer + 6, "%lld", &rss_kb); } - else if (swap_kb == -1 && strncmp(line_buffer, "Swap:", 5) == 0) { - sscanf(line_buffer + 5, "%lld", &swap_kb); + else if (swap_kb == -1 && strncmp(line_buffer, "VmSwap:", 7) == 0) { + sscanf(line_buffer + 7, "%lld", &swap_kb); } if (rss_kb != -1 && swap_kb != -1) { break; // Found both @@ -2063,7 +2063,7 @@ gc_should_collect_mem_usage(GCState *gcstate) // 70,000 new container objects. return true; } - Py_ssize_t last_mem = gcstate->last_mem; + Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem); Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128); if ((mem - last_mem) > mem_threshold) { // The process memory usage has increased too much, do a collection. @@ -2134,7 +2134,19 @@ record_deallocation(PyThreadState *tstate) gc->alloc_count--; if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) { GCState *gcstate = &tstate->interp->gc; - _Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count); + int count = _Py_atomic_load_int_relaxed(&gcstate->young.count); + int new_count; + do { + if (count == 0) { + break; + } + new_count = count + (int)gc->alloc_count; + if (new_count < 0) { + new_count = 0; + } + } while (!_Py_atomic_compare_exchange_int(&gcstate->young.count, + &count, + new_count)); gc->alloc_count = 0; } } @@ -2246,7 +2258,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Store the current memory usage, can be smaller now if breaking cycles // freed some memory. - state->gcstate->last_mem = get_process_mem_usage(); + Py_ssize_t last_mem = get_process_mem_usage(); + _Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem); // Append objects with legacy finalizers to the "gc.garbage" list. handle_legacy_finalizers(state); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7e8b05b747ed8f..f2b99365772f4d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -653,7 +653,7 @@ new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; - frame->return_offset = 6 ; + frame->return_offset = 6u ; } // _PUSH_FRAME { @@ -1586,7 +1586,7 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - frame->return_offset = 4 ; + frame->return_offset = 4u ; DISPATCH_INLINED(new_frame); } STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); @@ -2641,7 +2641,7 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - assert( 1 == 1); + assert( 1u == 1); frame->return_offset = 1; DISPATCH_INLINED(new_frame); } @@ -2922,8 +2922,8 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - assert( 4 == 1 + INLINE_CACHE_ENTRIES_CALL_KW); - frame->return_offset = 4 ; + assert( 4u == 1 + INLINE_CACHE_ENTRIES_CALL_KW); + frame->return_offset = 4u ; DISPATCH_INLINED(new_frame); } STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); @@ -3324,6 +3324,14 @@ JUMP_TO_PREDICTED(CALL_KW); } } + // _CHECK_RECURSION_REMAINING + { + if (tstate->py_recursion_remaining <= 1) { + UPDATE_MISS_STATS(CALL_KW); + assert(_PyOpcode_Deopt[opcode] == (CALL_KW)); + JUMP_TO_PREDICTED(CALL_KW); + } + } // _PY_FRAME_KW { kwnames = stack_pointer[-1]; @@ -5724,8 +5732,17 @@ // _FOR_ITER { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + stack_pointer = _PyFrame_GetStackPointer(frame); + JUMP_TO_LABEL(error); + } _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + PyObject *next_o = func(iter_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { @@ -5804,7 +5821,7 @@ gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; gen_frame->previous = frame; - frame->return_offset = (uint16_t)( 2 + oparg); + frame->return_offset = (uint16_t)( 2u + oparg); } // _PUSH_FRAME { @@ -6443,7 +6460,7 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - frame->return_offset = 4 ; + frame->return_offset = 4u ; DISPATCH_INLINED(new_frame); } STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); @@ -6664,7 +6681,7 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - assert( 1 == 1); + assert( 1u == 1); frame->return_offset = 1; DISPATCH_INLINED(new_frame); } @@ -6815,8 +6832,8 @@ if (new_frame == NULL) { JUMP_TO_LABEL(error); } - assert( 4 == 1 + INLINE_CACHE_ENTRIES_CALL_KW); - frame->return_offset = 4 ; + assert( 4u == 1 + INLINE_CACHE_ENTRIES_CALL_KW); + frame->return_offset = 4u ; DISPATCH_INLINED(new_frame); } STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); @@ -7037,8 +7054,17 @@ /* Skip 1 cache entry */ iter = stack_pointer[-1]; PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + stack_pointer = _PyFrame_GetStackPointer(frame); + JUMP_TO_LABEL(error); + } _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + PyObject *next_o = func(iter_o); stack_pointer = _PyFrame_GetStackPointer(frame); if (next_o != NULL) { next = PyStackRef_FromPyObjectSteal(next_o); @@ -7478,7 +7504,13 @@ } // _MAYBE_INSTRUMENT { - if (tstate->tracing == 0) { + #ifdef Py_GIL_DISABLED + + int check_instrumentation = 1; + #else + int check_instrumentation = (tstate->tracing == 0); + #endif + if (check_instrumentation) { uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { @@ -8205,7 +8237,7 @@ stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name); - frame->return_offset = 10 ; + frame->return_offset = 10u ; DISPATCH_INLINED(new_frame); } @@ -10454,7 +10486,13 @@ } // _MAYBE_INSTRUMENT { - if (tstate->tracing == 0) { + #ifdef Py_GIL_DISABLED + + int check_instrumentation = 1; + #else + int check_instrumentation = (tstate->tracing == 0); + #endif + if (check_instrumentation) { uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); if (code_version != global_version) { @@ -10652,8 +10690,8 @@ gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - assert( 2 + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)( 2 + oparg); + assert( 2u + oparg <= UINT16_MAX); + frame->return_offset = (uint16_t)( 2u + oparg); assert(gen_frame->previous == NULL); gen_frame->previous = frame; DISPATCH_INLINED(gen_frame); @@ -10753,8 +10791,8 @@ gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - assert( 2 + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)( 2 + oparg); + assert( 2u + oparg <= UINT16_MAX); + frame->return_offset = (uint16_t)( 2u + oparg); gen_frame->previous = frame; } // _PUSH_FRAME diff --git a/Python/getargs.c b/Python/getargs.c index 0cf596285cc7b5..9d3dea0cb4364b 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1,8 +1,6 @@ /* New getargs implementation */ -#include <stdbool.h> - #define PY_CXX_CONST const #include "Python.h" #include "pycore_abstract.h" // _PyNumber_Index() @@ -468,12 +466,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; - bool nullable = false; int istuple = PyTuple_Check(arg); int mustbetuple = istuple; - assert(*format == '('); - format++; for (;;) { int c = *format++; if (c == '(') { @@ -482,12 +477,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, level++; } else if (c == ')') { - if (level == 0) { - if (*format == '?') { - nullable = true; - } + if (level == 0) break; - } level--; } else if (c == ':' || c == ';' || c == '\0') @@ -524,13 +515,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } - if (arg == Py_None && nullable) { - const char *msg = skipitem(p_format, p_va, flags); - if (msg != NULL) { - levels[0] = 0; - } - return msg; - } if (istuple) { /* fallthrough */ } @@ -539,10 +523,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item tuple%s, not %.50s", - n, - nullable ? " or None" : "", - arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); + "must be %d-item tuple, not %.50s", + n, + arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } else { @@ -579,7 +562,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } - format = *p_format + 1; + format = *p_format; for (i = 0; i < n; i++) { const char *msg; PyObject *item = PyTuple_GET_ITEM(arg, i); @@ -594,10 +577,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } - format++; - if (*format == '?') { - format++; - } *p_format = format; if (!istuple) { Py_DECREF(arg); @@ -616,8 +595,11 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; if (*format == '(' /* ')' */) { + format++; msg = converttuple(arg, &format, p_va, flags, levels, msgbuf, bufsize, freelist); + if (msg == NULL) + format++; } else { msg = convertsimple(arg, &format, p_va, flags, @@ -647,7 +629,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -657,23 +639,20 @@ converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, siz } else { PyOS_snprintf(msgbuf, bufsize, - "must be %.50s%s, not %.50s", expected, - nullable ? " or None" : "", + "must be %.50s, not %.50s", expected, arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(bool nullable, const char *expected, const char *what, Py_ssize_t size, +convertcharerr(const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, - "must be %.50s%s, not %.50s of length %zd", - expected, - nullable ? " or None" : "", - what, size); + "must be %.50s, not %.50s of length %zd", + expected, what, size); return msgbuf; } @@ -693,26 +672,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char *msgbuf, size_t bufsize, freelist_t *freelist) { #define RETURN_ERR_OCCURRED return msgbuf -#define HANDLE_NULLABLE \ - if (*format == '?') { \ - format++; \ - if (arg == Py_None) { \ - break; \ - } \ - nullable = true; \ - } - const char *format = *p_format; char c = *format++; const char *sarg; - bool nullable = false; switch (c) { case 'b': { /* unsigned byte -- very short int */ unsigned char *p = va_arg(*p_va, unsigned char *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -726,6 +694,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, "unsigned byte integer is greater than maximum"); RETURN_ERR_OCCURRED; } + else *p = (unsigned char) ival; break; } @@ -733,7 +702,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'B': {/* byte sized bitfield - both signed and unsigned values allowed */ unsigned char *p = va_arg(*p_va, unsigned char *); - HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -744,7 +712,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'h': {/* signed short int */ short *p = va_arg(*p_va, short *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -766,7 +733,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'H': { /* short int sized bitfield, both signed and unsigned allowed */ unsigned short *p = va_arg(*p_va, unsigned short *); - HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -777,7 +743,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'i': {/* signed int */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -799,7 +764,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'I': { /* int sized bitfield, both signed and unsigned allowed */ unsigned int *p = va_arg(*p_va, unsigned int *); - HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -812,7 +776,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { PyObject *iobj; Py_ssize_t *p = va_arg(*p_va, Py_ssize_t *); - HANDLE_NULLABLE; Py_ssize_t ival = -1; iobj = _PyNumber_Index(arg); if (iobj != NULL) { @@ -826,7 +789,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } case 'l': {/* long int */ long *p = va_arg(*p_va, long *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -837,10 +799,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'k': { /* long sized bitfield */ unsigned long *p = va_arg(*p_va, unsigned long *); - HANDLE_NULLABLE; unsigned long ival; if (!PyIndex_Check(arg)) { - return converterr(nullable, "int", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); } ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)(long)-1 && PyErr_Occurred()) { @@ -852,7 +813,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'L': {/* long long */ long long *p = va_arg( *p_va, long long * ); - HANDLE_NULLABLE; long long ival = PyLong_AsLongLong(arg); if (ival == (long long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -863,10 +823,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'K': { /* long long sized bitfield */ unsigned long long *p = va_arg(*p_va, unsigned long long *); - HANDLE_NULLABLE; unsigned long long ival; if (!PyIndex_Check(arg)) { - return converterr(nullable, "int", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); } ival = PyLong_AsUnsignedLongLongMask(arg); if (ival == (unsigned long long)(long long)-1 && PyErr_Occurred()) { @@ -878,7 +837,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'f': {/* float */ float *p = va_arg(*p_va, float *); - HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -889,7 +847,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'd': {/* double */ double *p = va_arg(*p_va, double *); - HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -900,7 +857,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'D': {/* complex double */ Py_complex *p = va_arg(*p_va, Py_complex *); - HANDLE_NULLABLE; Py_complex cval; cval = PyComplex_AsCComplex(arg); if (PyErr_Occurred()) @@ -912,10 +868,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'c': {/* char */ char *p = va_arg(*p_va, char *); - HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr(nullable, "a byte string of length 1", + return convertcharerr("a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -923,28 +878,27 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr(nullable, "a byte string of length 1", + return convertcharerr("a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr(nullable, "a byte string of length 1", arg, msgbuf, bufsize); + return converterr("a byte string of length 1", arg, msgbuf, bufsize); break; } case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; int kind; const void *data; if (!PyUnicode_Check(arg)) - return converterr(nullable, "a unicode character", arg, msgbuf, bufsize); + return converterr("a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr(nullable, "a unicode character", + return convertcharerr("a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -957,7 +911,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'p': {/* boolean *p*redicate */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; int val = PyObject_IsTrue(arg); if (val > 0) *p = 1; @@ -976,31 +929,24 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count; if (*format == '*') { - format++; - HANDLE_NULLABLE; if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); + format++; if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } break; } - else if (*format == '#') { + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(buf, arg, msgbuf, bufsize); + if (*format == '#') { Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - format++; - HANDLE_NULLABLE; - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; - } - else { - HANDLE_NULLABLE; - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + format++; + } else { if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -1016,35 +962,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, /* "s*" or "z*" */ Py_buffer *p = (Py_buffer *)va_arg(*p_va, Py_buffer *); - format++; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0); else if (PyUnicode_Check(arg)) { Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } + format++; } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ const void **p = (const void **)va_arg(*p_va, const char **); Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - format++; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) { *p = NULL; *psize = 0; @@ -1053,7 +996,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -1063,22 +1006,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); *psize = count; } + format++; } else { /* "s" or "z" */ const char **p = va_arg(*p_va, const char **); Py_ssize_t len; sarg = NULL; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -1087,7 +1030,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(c == 'z' || nullable, "str", + return converterr(c == 'z' ? "str or None" : "str", arg, msgbuf, bufsize); } break; @@ -1116,46 +1059,13 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - nullable, "(unknown parser marker combination)", + "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr(nullable, "(buffer is NULL)", + return converterr("(buffer is NULL)", arg, msgbuf, bufsize); - Py_ssize_t *psize = NULL; - if (*format == '#') { - /* Using buffer length parameter '#': - - - if *buffer is NULL, a new buffer of the - needed size is allocated and the data - copied into it; *buffer is updated to point - to the new buffer; the caller is - responsible for PyMem_Free()ing it after - usage - - - if *buffer is not NULL, the data is - copied to *buffer; *buffer_len has to be - set to the size of the buffer on input; - buffer overflow is signalled with an error; - buffer has to provide enough room for the - encoded string plus the trailing 0-byte - - - in both cases, *buffer_len is updated to - the size of the buffer /excluding/ the - trailing 0-byte - - */ - psize = va_arg(*p_va, Py_ssize_t*); - - format++; - if (psize == NULL) { - return converterr( - nullable, "(buffer_len is NULL)", - arg, msgbuf, bufsize); - } - } - HANDLE_NULLABLE; /* Encode object */ if (!recode_strings && @@ -1176,7 +1086,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr(nullable, "(encoding failed)", + return converterr("(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1186,15 +1096,42 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - nullable, - recode_strings ? "str" - : nullable ? "str, bytes, bytearray" - : "str, bytes or bytearray", + recode_strings ? "str" : "str, bytes or bytearray", arg, msgbuf, bufsize); } /* Write output; output is guaranteed to be 0-terminated */ - if (psize != NULL) { + if (*format == '#') { + /* Using buffer length parameter '#': + + - if *buffer is NULL, a new buffer of the + needed size is allocated and the data + copied into it; *buffer is updated to point + to the new buffer; the caller is + responsible for PyMem_Free()ing it after + usage + + - if *buffer is not NULL, the data is + copied to *buffer; *buffer_len has to be + set to the size of the buffer on input; + buffer overflow is signalled with an error; + buffer has to provide enough room for the + encoded string plus the trailing 0-byte + + - in both cases, *buffer_len is updated to + the size of the buffer /excluding/ the + trailing 0-byte + + */ + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); + + format++; + if (psize == NULL) { + Py_DECREF(s); + return converterr( + "(buffer_len is NULL)", + arg, msgbuf, bufsize); + } if (*buffer == NULL) { *buffer = PyMem_NEW(char, size + 1); if (*buffer == NULL) { @@ -1205,7 +1142,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1239,7 +1176,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - nullable, "encoded string without null bytes", + "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1250,7 +1187,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr(nullable, "(cleanup problem)", + return converterr("(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1261,32 +1198,29 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'S': { /* PyBytes object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyBytes_Check(arg)) *p = arg; else - return converterr(nullable, "bytes", arg, msgbuf, bufsize); + return converterr("bytes", arg, msgbuf, bufsize); break; } case 'Y': { /* PyByteArray object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyByteArray_Check(arg)) *p = arg; else - return converterr(nullable, "bytearray", arg, msgbuf, bufsize); + return converterr("bytearray", arg, msgbuf, bufsize); break; } case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyUnicode_Check(arg)) { *p = arg; } else - return converterr(nullable, "str", arg, msgbuf, bufsize); + return converterr("str", arg, msgbuf, bufsize); break; } @@ -1297,11 +1231,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, type = va_arg(*p_va, PyTypeObject*); p = va_arg(*p_va, PyObject **); format++; - HANDLE_NULLABLE; if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(nullable, type->tp_name, arg, msgbuf, bufsize); + return converterr(type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1310,18 +1243,16 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, void *addr = va_arg(*p_va, void *); int res; format++; - HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr(nullable, "(unspecified)", + return converterr("(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr(nullable, "(cleanup problem)", + return converterr("(cleanup problem)", arg, msgbuf, bufsize); } else { p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; *p = arg; } break; @@ -1333,30 +1264,29 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - nullable, "(invalid use of 'w' format character)", + "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; - HANDLE_NULLABLE; /* Caller is interested in Py_buffer, and the object supports it directly. The request implicitly asks for PyBUF_SIMPLE, so the result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr(nullable, "read-write bytes-like object", + return converterr("read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr(nullable, "(impossible<bad format char>)", arg, msgbuf, bufsize); + return converterr("(impossible<bad format char>)", arg, msgbuf, bufsize); } @@ -2751,9 +2681,6 @@ skipitem(const char **p_format, va_list *p_va, int flags) return "impossible<bad format char>"; } - if (*format == '?') { - format++; - } *p_format = format; return NULL; diff --git a/Python/getversion.c b/Python/getversion.c index 226b2f999a6bfd..8d8bc6ea70048c 100644 --- a/Python/getversion.c +++ b/Python/getversion.c @@ -15,7 +15,7 @@ void _Py_InitVersion(void) } initialized = 1; #ifdef Py_GIL_DISABLED - const char *buildinfo_format = "%.80s experimental free-threading build (%.80s) %.80s"; + const char *buildinfo_format = "%.80s free-threading build (%.80s) %.80s"; #else const char *buildinfo_format = "%.80s (%.80s) %.80s"; #endif diff --git a/Python/hamt.c b/Python/hamt.c index f9bbf63961d8de..e372b1a1b4c18b 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -256,9 +256,9 @@ Debug ===== The HAMT datatype is accessible for testing purposes under the -`_testcapi` module: +`_testinternalcapi` module: - >>> from _testcapi import hamt + >>> from _testinternalcapi import hamt >>> h = hamt() >>> h2 = h.set('a', 2) >>> h3 = h2.set('b', 3) @@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } if (key_or_null == NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) { goto error; } @@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } @@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } diff --git a/Python/import.c b/Python/import.c index afdc28eda31b9b..0158709ad91947 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_audit.h" // _PySys_Audit() #include "pycore_ceval.h" +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() #include "pycore_hashtable.h" // _Py_hashtable_new_full() #include "pycore_import.h" // _PyImport_BootstrapImp() #include "pycore_initconfig.h" // _PyStatus_OK() @@ -309,13 +310,8 @@ PyImport_GetModule(PyObject *name) if not, create a new one and insert it in the modules dictionary. */ static PyObject * -import_add_module(PyThreadState *tstate, PyObject *name) +import_add_module_lock_held(PyObject *modules, PyObject *name) { - PyObject *modules = get_modules_dict(tstate, false); - if (modules == NULL) { - return NULL; - } - PyObject *m; if (PyMapping_GetOptionalItem(modules, name, &m) < 0) { return NULL; @@ -335,6 +331,21 @@ import_add_module(PyThreadState *tstate, PyObject *name) return m; } +static PyObject * +import_add_module(PyThreadState *tstate, PyObject *name) +{ + PyObject *modules = get_modules_dict(tstate, false); + if (modules == NULL) { + return NULL; + } + + PyObject *m; + Py_BEGIN_CRITICAL_SECTION(modules); + m = import_add_module_lock_held(modules, name); + Py_END_CRITICAL_SECTION(); + return m; +} + PyObject * PyImport_AddModuleRef(const char *name) { @@ -3852,15 +3863,17 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, } final_mod = import_get_module(tstate, to_return); - Py_DECREF(to_return); if (final_mod == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_Format(tstate, PyExc_KeyError, "%R not in sys.modules as expected", to_return); } + Py_DECREF(to_return); goto error; } + + Py_DECREF(to_return); } } else { @@ -3956,23 +3969,28 @@ PyImport_Import(PyObject *module_name) } /* Get the builtins from current globals */ - globals = PyEval_GetGlobals(); + globals = PyEval_GetGlobals(); // borrowed if (globals != NULL) { Py_INCREF(globals); + // XXX Use _PyEval_EnsureBuiltins()? builtins = PyObject_GetItem(globals, &_Py_ID(__builtins__)); - if (builtins == NULL) + if (builtins == NULL) { + // XXX Fall back to interp->builtins or sys.modules['builtins']? goto err; + } + } + else if (_PyErr_Occurred(tstate)) { + goto err; } else { /* No globals -- use standard builtins, and fake globals */ - builtins = PyImport_ImportModuleLevel("builtins", - NULL, NULL, NULL, 0); - if (builtins == NULL) { + globals = PyDict_New(); + if (globals == NULL) { goto err; } - globals = Py_BuildValue("{OO}", &_Py_ID(__builtins__), builtins); - if (globals == NULL) + if (_PyEval_EnsureBuiltinsWithModule(tstate, globals, &builtins) < 0) { goto err; + } } /* Get the __import__ function from the builtins */ diff --git a/Python/importdl.c b/Python/importdl.c index 802843fe7b9dce..98e71e643ef331 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -175,7 +175,6 @@ _Py_ext_module_loader_info_init_for_builtin( PyObject *name) { assert(PyUnicode_Check(name)); - assert(PyUnicode_FindChar(name, '.', 0, PyUnicode_GetLength(name), -1) == -1); assert(PyUnicode_GetLength(name) > 0); PyObject *name_encoded = PyUnicode_AsEncodedString(name, "ascii", NULL); diff --git a/Python/index_pool.c b/Python/index_pool.c index 007c81a0fc16ec..520a65938ec6c7 100644 --- a/Python/index_pool.c +++ b/Python/index_pool.c @@ -172,6 +172,9 @@ _PyIndexPool_AllocIndex(_PyIndexPool *pool) else { index = heap_pop(free_indices); } + + pool->tlbc_generation++; + UNLOCK_POOL(pool); return index; } @@ -180,6 +183,7 @@ void _PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index) { LOCK_POOL(pool); + pool->tlbc_generation++; heap_add(&pool->free_indices, index); UNLOCK_POOL(pool); } diff --git a/Python/initconfig.c b/Python/initconfig.c index e827091172162e..bdd223213f5dd6 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -239,9 +239,11 @@ static const PyConfigSpec PYPRECONFIG_SPEC[] = { // Forward declarations -static PyObject* -config_get(const PyConfig *config, const PyConfigSpec *spec, - int use_sys); +static PyObject* config_get(const PyConfig *config, const PyConfigSpec *spec, + int use_sys); +static void initconfig_free_wstr(wchar_t *member); +static void initconfig_free_wstr_list(PyWideStringList *list); +static void initconfig_free_config(const PyConfig *config); /* --- Command line options --------------------------------------- */ @@ -312,7 +314,7 @@ The following implementation-specific options are available:\n\ "-X gil=[0|1]: enable (1) or disable (0) the GIL; also PYTHON_GIL\n" #endif "\ --X importtime[=2]: show how long each import takes; use -X importtime=2 to\ +-X importtime[=2]: show how long each import takes; use -X importtime=2 to\n\ log imports of already-loaded modules; also PYTHONPROFILEIMPORTTIME\n\ -X int_max_str_digits=N: limit the size of int<->str conversions;\n\ 0 disables the limit; also PYTHONINTMAXSTRDIGITS\n\ @@ -2962,8 +2964,6 @@ config_parse_cmdline(PyConfig *config, PyWideStringList *warnoptions, /* option handled by _PyPreCmdline_Read() */ break; - /* case 'J': reserved for Jython */ - case 'O': config->optimization_level++; break; @@ -3727,6 +3727,9 @@ PyInitConfig_Free(PyInitConfig *config) if (config == NULL) { return; } + + initconfig_free_config(&config->config); + PyMem_RawFree(config->inittab); free(config->err_msg); free(config); } @@ -4095,13 +4098,51 @@ PyInitConfig_SetStr(PyInitConfig *config, const char *name, const char* value) } +static void +initconfig_free_wstr(wchar_t *member) +{ + if (member) { + free(member); + } +} + + +static void +initconfig_free_wstr_list(PyWideStringList *list) +{ + for (Py_ssize_t i = 0; i < list->length; i++) { + free(list->items[i]); + } + free(list->items); +} + + +static void +initconfig_free_config(const PyConfig *config) +{ + const PyConfigSpec *spec = PYCONFIG_SPEC; + for (; spec->name != NULL; spec++) { + void *member = config_get_spec_member(config, spec); + if (spec->type == PyConfig_MEMBER_WSTR + || spec->type == PyConfig_MEMBER_WSTR_OPT) + { + wchar_t *wstr = *(wchar_t **)member; + initconfig_free_wstr(wstr); + } + else if (spec->type == PyConfig_MEMBER_WSTR_LIST) { + initconfig_free_wstr_list(member); + } + } +} + + static int -_PyWideStringList_FromUTF8(PyInitConfig *config, PyWideStringList *list, - Py_ssize_t length, char * const *items) +initconfig_set_str_list(PyInitConfig *config, PyWideStringList *list, + Py_ssize_t length, char * const *items) { PyWideStringList wlist = _PyWideStringList_INIT; size_t size = sizeof(wchar_t*) * length; - wlist.items = (wchar_t **)PyMem_RawMalloc(size); + wlist.items = (wchar_t **)malloc(size); if (wlist.items == NULL) { config->status = _PyStatus_NO_MEMORY(); return -1; @@ -4110,14 +4151,14 @@ _PyWideStringList_FromUTF8(PyInitConfig *config, PyWideStringList *list, for (Py_ssize_t i = 0; i < length; i++) { wchar_t *arg = utf8_to_wstr(config, items[i]); if (arg == NULL) { - _PyWideStringList_Clear(&wlist); + initconfig_free_wstr_list(&wlist); return -1; } wlist.items[i] = arg; wlist.length++; } - _PyWideStringList_Clear(list); + initconfig_free_wstr_list(list); *list = wlist; return 0; } @@ -4138,7 +4179,7 @@ PyInitConfig_SetStrList(PyInitConfig *config, const char *name, return -1; } PyWideStringList *list = raw_member; - if (_PyWideStringList_FromUTF8(config, list, length, items) < 0) { + if (initconfig_set_str_list(config, list, length, items) < 0) { return -1; } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 13bdd041becd69..32d6a204182892 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1040,6 +1040,8 @@ set_version_raw(uintptr_t *ptr, uint32_t version) static void set_global_version(PyThreadState *tstate, uint32_t version) { + ASSERT_WORLD_STOPPED(); + assert((version & _PY_EVAL_EVENTS_MASK) == 0); PyInterpreterState *interp = tstate->interp; set_version_raw(&interp->ceval.instrumentation_version, version); @@ -1190,9 +1192,10 @@ call_instrumentation_vector( break; } else { - LOCK_CODE(code); + PyInterpreterState *interp = tstate->interp; + _PyEval_StopTheWorld(interp); remove_tools(code, offset, event, 1 << tool); - UNLOCK_CODE(); + _PyEval_StartTheWorld(interp); } } } @@ -1381,9 +1384,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, } else { /* DISABLE */ - LOCK_CODE(code); + PyInterpreterState *interp = tstate->interp; + _PyEval_StopTheWorld(interp); remove_line_tools(code, i, 1 << tool); - UNLOCK_CODE(); + _PyEval_StartTheWorld(interp); } } while (tools); Py_DECREF(line_obj); @@ -1438,9 +1442,10 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* } else { /* DISABLE */ - LOCK_CODE(code); + PyInterpreterState *interp = tstate->interp; + _PyEval_StopTheWorld(interp); remove_per_instruction_tools(code, offset, 1 << tool); - UNLOCK_CODE(); + _PyEval_StartTheWorld(interp); } } Py_DECREF(offset_obj); @@ -1936,28 +1941,26 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int -instrument_all_executing_code_objects(PyInterpreterState *interp) { +instrument_all_executing_code_objects(PyInterpreterState *interp) +{ ASSERT_WORLD_STOPPED(); - _PyRuntimeState *runtime = &_PyRuntime; - HEAD_LOCK(runtime); - PyThreadState* ts = PyInterpreterState_ThreadHead(interp); - HEAD_UNLOCK(runtime); - while (ts) { + int err = 0; + _Py_FOR_EACH_TSTATE_BEGIN(interp, ts) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { if (frame->owner < FRAME_OWNED_BY_INTERPRETER) { - if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) { - return -1; + err = instrument_lock_held(_PyFrame_GetCode(frame), interp); + if (err) { + goto done; } } frame = frame->previous; } - HEAD_LOCK(runtime); - ts = PyThreadState_Next(ts); - HEAD_UNLOCK(runtime); } - return 0; +done: + _Py_FOR_EACH_TSTATE_END(interp); + return err; } static void @@ -2003,6 +2006,7 @@ check_tool(PyInterpreterState *interp, int tool_id) int _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) { + ASSERT_WORLD_STOPPED(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; @@ -2011,33 +2015,28 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) return -1; } - int res; - _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { - res = 0; - goto done; + return 0; } - set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); - res = -1; - goto done; + return -1; } + set_events(&interp->monitors, tool_id, events); set_global_version(tstate, new_version); #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 1); #endif - res = instrument_all_executing_code_objects(interp); -done: - _PyEval_StartTheWorld(interp); - return res; + return instrument_all_executing_code_objects(interp); } int _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEventSet events) { + ASSERT_WORLD_STOPPED(); + assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); PyInterpreterState *interp = _PyInterpreterState_GET(); assert(events < (1 << _PY_MONITORING_LOCAL_EVENTS)); @@ -2049,11 +2048,8 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent return -1; } - int res; - _PyEval_StopTheWorld(interp); if (allocate_instrumentation_data(code)) { - res = -1; - goto done; + return -1; } code->_co_monitoring->tool_versions[tool_id] = interp->monitoring_tool_versions[tool_id]; @@ -2061,16 +2057,11 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent _Py_LocalMonitors *local = &code->_co_monitoring->local_monitors; uint32_t existing_events = get_local_events(local, tool_id); if (existing_events == events) { - res = 0; - goto done; + return 0; } set_local_events(local, tool_id, events); - res = force_instrument_lock_held(code, interp); - -done: - _PyEval_StartTheWorld(interp); - return res; + return force_instrument_lock_held(code, interp); } int @@ -2102,11 +2093,12 @@ int _PyMonitoring_ClearToolId(int tool_id) } } + _PyEval_StopTheWorld(interp); if (_PyMonitoring_SetEvents(tool_id, 0) < 0) { + _PyEval_StartTheWorld(interp); return -1; } - _PyEval_StopTheWorld(interp); uint32_t version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); @@ -2343,7 +2335,11 @@ monitoring_set_events_impl(PyObject *module, int tool_id, int event_set) event_set &= ~(1 << PY_MONITORING_EVENT_BRANCH); event_set |= (1 << PY_MONITORING_EVENT_BRANCH_RIGHT) | (1 << PY_MONITORING_EVENT_BRANCH_LEFT); } - if (_PyMonitoring_SetEvents(tool_id, event_set)) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + int err = _PyMonitoring_SetEvents(tool_id, event_set); + _PyEval_StartTheWorld(interp); + if (err) { return NULL; } Py_RETURN_NONE; @@ -2424,7 +2420,11 @@ monitoring_set_local_events_impl(PyObject *module, int tool_id, return NULL; } - if (_PyMonitoring_SetLocalEvents((PyCodeObject*)code, tool_id, event_set)) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + int err = _PyMonitoring_SetLocalEvents((PyCodeObject*)code, tool_id, event_set); + _PyEval_StartTheWorld(interp); + if (err) { return NULL; } Py_RETURN_NONE; @@ -2558,18 +2558,22 @@ PyObject *_Py_CreateMonitoringObject(void) err = PyObject_SetAttrString(events, "NO_EVENTS", _PyLong_GetZero()); if (err) goto error; PyObject *val = PyLong_FromLong(PY_MONITORING_DEBUGGER_ID); + assert(val != NULL); /* Can't return NULL because the int is small. */ err = PyObject_SetAttrString(mod, "DEBUGGER_ID", val); Py_DECREF(val); if (err) goto error; val = PyLong_FromLong(PY_MONITORING_COVERAGE_ID); + assert(val != NULL); err = PyObject_SetAttrString(mod, "COVERAGE_ID", val); Py_DECREF(val); if (err) goto error; val = PyLong_FromLong(PY_MONITORING_PROFILER_ID); + assert(val != NULL); err = PyObject_SetAttrString(mod, "PROFILER_ID", val); Py_DECREF(val); if (err) goto error; val = PyLong_FromLong(PY_MONITORING_OPTIMIZER_ID); + assert(val != NULL); err = PyObject_SetAttrString(mod, "OPTIMIZER_ID", val); Py_DECREF(val); if (err) goto error; @@ -2991,9 +2995,10 @@ branch_handler_vectorcall( // Orphaned NOT_TAKEN -- Jump removed by the compiler return res; } - LOCK_CODE(code); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); remove_tools(code, offset, other_event, 1 << self->tool_id); - UNLOCK_CODE(); + _PyEval_StartTheWorld(interp); } return res; } diff --git a/Python/jit.c b/Python/jit.c index e232cc1f7d9250..9fbd8a18590411 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -69,10 +69,6 @@ jit_alloc(size_t size) #else int flags = MAP_ANONYMOUS | MAP_PRIVATE; int prot = PROT_READ | PROT_WRITE; -# ifdef MAP_JIT - flags |= MAP_JIT; - prot |= PROT_EXEC; -# endif unsigned char *memory = mmap(NULL, size, prot, flags, -1, 0); int failed = memory == MAP_FAILED; #endif @@ -118,11 +114,8 @@ mark_executable(unsigned char *memory, size_t size) int old; int failed = !VirtualProtect(memory, size, PAGE_EXECUTE_READ, &old); #else - int failed = 0; __builtin___clear_cache((char *)memory, (char *)memory + size); -#ifndef MAP_JIT - failed = mprotect(memory, size, PROT_EXEC | PROT_READ); -#endif + int failed = mprotect(memory, size, PROT_EXEC | PROT_READ); #endif if (failed) { jit_error("unable to protect executable memory"); @@ -528,9 +521,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz if (memory == NULL) { return -1; } -#ifdef MAP_JIT - pthread_jit_write_protect_np(0); -#endif // Collect memory stats OPT_STAT_ADD(jit_total_memory_size, total_size); OPT_STAT_ADD(jit_code_size, code_size); @@ -568,9 +558,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz data += group->data_size; assert(code == memory + code_size); assert(data == memory + code_size + state.trampolines.size + data_size); -#ifdef MAP_JIT - pthread_jit_write_protect_np(1); -#endif if (mark_executable(memory, total_size)) { jit_free(memory, total_size); return -1; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index dbd19d7755c237..9c72fab298201e 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -133,16 +133,10 @@ sys_profile_call_or_return( Py_RETURN_NONE; } -int -_PyEval_SetOpcodeTrace( - PyFrameObject *frame, - bool enable -) { - assert(frame != NULL); - - PyCodeObject *code = _PyFrame_GetCode(frame->f_frame); +static int +set_opcode_trace_world_stopped(PyCodeObject *code, bool enable) +{ _PyMonitoringEventSet events = 0; - if (_PyMonitoring_GetLocalEvents(code, PY_MONITORING_SYS_TRACE_ID, &events) < 0) { return -1; } @@ -161,6 +155,32 @@ _PyEval_SetOpcodeTrace( return _PyMonitoring_SetLocalEvents(code, PY_MONITORING_SYS_TRACE_ID, events); } +int +_PyEval_SetOpcodeTrace(PyFrameObject *frame, bool enable) +{ + assert(frame != NULL); + + PyCodeObject *code = _PyFrame_GetCode(frame->f_frame); + +#ifdef Py_GIL_DISABLED + // First check if a change is necessary outside of the stop-the-world pause + _PyMonitoringEventSet events = 0; + if (_PyMonitoring_GetLocalEvents(code, PY_MONITORING_SYS_TRACE_ID, &events) < 0) { + return -1; + } + int is_enabled = (events & (1 << PY_MONITORING_EVENT_INSTRUCTION)) != 0; + if (is_enabled == enable) { + return 0; // No change needed + } +#endif + + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + int res = set_opcode_trace_world_stopped(code, enable); + _PyEval_StartTheWorld(interp); + return res; +} + static PyObject * call_trace_func(_PyLegacyEventHandler *self, PyObject *arg) { @@ -438,59 +458,74 @@ is_tstate_valid(PyThreadState *tstate) } #endif -static Py_ssize_t -setup_profile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_profileobj) +static int +setup_profile_callbacks(void *Py_UNUSED(arg)) { - *old_profileobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ - if (!tstate->interp->sys_profile_initialized) { - tstate->interp->sys_profile_initialized = true; - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_start, PyTrace_CALL, - PY_MONITORING_EVENT_PY_START, - PY_MONITORING_EVENT_PY_RESUME)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_throw, PyTrace_CALL, - PY_MONITORING_EVENT_PY_THROW, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_return, PyTrace_RETURN, - PY_MONITORING_EVENT_PY_RETURN, - PY_MONITORING_EVENT_PY_YIELD)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_unwind, PyTrace_RETURN, - PY_MONITORING_EVENT_PY_UNWIND, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_call_or_return, PyTrace_C_CALL, - PY_MONITORING_EVENT_CALL, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_call_or_return, PyTrace_C_RETURN, - PY_MONITORING_EVENT_C_RETURN, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, - sys_profile_call_or_return, PyTrace_C_EXCEPTION, - PY_MONITORING_EVENT_C_RAISE, -1)) { - return -1; - } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_start, PyTrace_CALL, + PY_MONITORING_EVENT_PY_START, + PY_MONITORING_EVENT_PY_RESUME)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_throw, PyTrace_CALL, + PY_MONITORING_EVENT_PY_THROW, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_return, PyTrace_RETURN, + PY_MONITORING_EVENT_PY_RETURN, + PY_MONITORING_EVENT_PY_YIELD)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_unwind, PyTrace_RETURN, + PY_MONITORING_EVENT_PY_UNWIND, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_call_or_return, PyTrace_C_CALL, + PY_MONITORING_EVENT_CALL, -1)) { + return -1; } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_call_or_return, PyTrace_C_RETURN, + PY_MONITORING_EVENT_C_RETURN, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_PROFILE_ID, + sys_profile_call_or_return, PyTrace_C_EXCEPTION, + PY_MONITORING_EVENT_C_RAISE, -1)) { + return -1; + } + return 0; +} +static PyObject * +swap_profile_func_arg(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +{ int delta = (func != NULL) - (tstate->c_profilefunc != NULL); tstate->c_profilefunc = func; - *old_profileobj = tstate->c_profileobj; + PyObject *old_profileobj = tstate->c_profileobj; tstate->c_profileobj = Py_XNewRef(arg); tstate->interp->sys_profiling_threads += delta; assert(tstate->interp->sys_profiling_threads >= 0); - return tstate->interp->sys_profiling_threads; + return old_profileobj; +} + +static int +set_monitoring_profile_events(PyInterpreterState *interp) +{ + uint32_t events = 0; + if (interp->sys_profiling_threads) { + events = + (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | + (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | + (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | + (1 << PY_MONITORING_EVENT_PY_THROW); + } + return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); } int @@ -507,87 +542,155 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) return -1; } - // needs to be decref'd outside of the lock - PyObject *old_profileobj; - LOCK_SETUP(); - Py_ssize_t profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); - UNLOCK_SETUP(); - Py_XDECREF(old_profileobj); + PyInterpreterState *interp = tstate->interp; + if (_PyOnceFlag_CallOnce(&interp->sys_profile_once_flag, + setup_profile_callbacks, NULL) < 0) { + return -1; + } + + _PyEval_StopTheWorld(interp); + PyObject *old_profileobj = swap_profile_func_arg(tstate, func, arg); + int ret = set_monitoring_profile_events(interp); + _PyEval_StartTheWorld(interp); + Py_XDECREF(old_profileobj); // needs to be decref'd outside of stop-the-world + return ret; +} + +int +_PyEval_SetProfileAllThreads(PyInterpreterState *interp, Py_tracefunc func, PyObject *arg) +{ + PyThreadState *current_tstate = _PyThreadState_GET(); + assert(is_tstate_valid(current_tstate)); + assert(current_tstate->interp == interp); + + if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { + return -1; + } + + if (_PyOnceFlag_CallOnce(&interp->sys_profile_once_flag, + setup_profile_callbacks, NULL) < 0) { + return -1; + } - uint32_t events = 0; - if (profiling_threads) { - events = - (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | - (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | - (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | - (1 << PY_MONITORING_EVENT_PY_THROW); + PyObject *old_profileobjs = NULL; + _PyEval_StopTheWorld(interp); + HEAD_LOCK(&_PyRuntime); + Py_ssize_t num_thread_states = 0; + _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) { + num_thread_states++; } - return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); + old_profileobjs = PyTuple_New(num_thread_states); + if (old_profileobjs == NULL) { + HEAD_UNLOCK(&_PyRuntime); + _PyEval_StartTheWorld(interp); + return -1; + } + _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + PyObject *old = swap_profile_func_arg(tstate, func, arg); + PyTuple_SET_ITEM(old_profileobjs, --num_thread_states, old); + } + HEAD_UNLOCK(&_PyRuntime); + int ret = set_monitoring_profile_events(interp); + _PyEval_StartTheWorld(interp); + Py_XDECREF(old_profileobjs); // needs to be decref'd outside of stop-the-world + return ret; } -static Py_ssize_t -setup_tracing(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_traceobj) +static int +setup_trace_callbacks(void *Py_UNUSED(arg)) { - *old_traceobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ - if (!tstate->interp->sys_trace_initialized) { - tstate->interp->sys_trace_initialized = true; - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_start, PyTrace_CALL, - PY_MONITORING_EVENT_PY_START, - PY_MONITORING_EVENT_PY_RESUME)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_throw, PyTrace_CALL, - PY_MONITORING_EVENT_PY_THROW, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_return, PyTrace_RETURN, - PY_MONITORING_EVENT_PY_RETURN, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_yield, PyTrace_RETURN, - PY_MONITORING_EVENT_PY_YIELD, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_exception_func, PyTrace_EXCEPTION, - PY_MONITORING_EVENT_RAISE, - PY_MONITORING_EVENT_STOP_ITERATION)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_line_func, PyTrace_LINE, - PY_MONITORING_EVENT_LINE, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_unwind, PyTrace_RETURN, - PY_MONITORING_EVENT_PY_UNWIND, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_jump_func, PyTrace_LINE, - PY_MONITORING_EVENT_JUMP, -1)) { - return -1; - } - if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, - sys_trace_instruction_func, PyTrace_OPCODE, - PY_MONITORING_EVENT_INSTRUCTION, -1)) { - return -1; - } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_start, PyTrace_CALL, + PY_MONITORING_EVENT_PY_START, + PY_MONITORING_EVENT_PY_RESUME)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_throw, PyTrace_CALL, + PY_MONITORING_EVENT_PY_THROW, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_return, PyTrace_RETURN, + PY_MONITORING_EVENT_PY_RETURN, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_yield, PyTrace_RETURN, + PY_MONITORING_EVENT_PY_YIELD, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_exception_func, PyTrace_EXCEPTION, + PY_MONITORING_EVENT_RAISE, + PY_MONITORING_EVENT_STOP_ITERATION)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_line_func, PyTrace_LINE, + PY_MONITORING_EVENT_LINE, -1)) { + return -1; } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_unwind, PyTrace_RETURN, + PY_MONITORING_EVENT_PY_UNWIND, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_jump_func, PyTrace_LINE, + PY_MONITORING_EVENT_JUMP, -1)) { + return -1; + } + if (set_callbacks(PY_MONITORING_SYS_TRACE_ID, + sys_trace_instruction_func, PyTrace_OPCODE, + PY_MONITORING_EVENT_INSTRUCTION, -1)) { + return -1; + } + return 0; +} +static PyObject * +swap_trace_func_arg(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +{ int delta = (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = func; - *old_traceobj = tstate->c_traceobj; + PyObject *old_traceobj = tstate->c_traceobj; tstate->c_traceobj = Py_XNewRef(arg); tstate->interp->sys_tracing_threads += delta; assert(tstate->interp->sys_tracing_threads >= 0); - return tstate->interp->sys_tracing_threads; + return old_traceobj; +} + +static int +set_monitoring_trace_events(PyInterpreterState *interp) +{ + uint32_t events = 0; + if (interp->sys_tracing_threads) { + events = + (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | + (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | + (1 << PY_MONITORING_EVENT_RAISE) | (1 << PY_MONITORING_EVENT_LINE) | + (1 << PY_MONITORING_EVENT_JUMP) | + (1 << PY_MONITORING_EVENT_PY_UNWIND) | (1 << PY_MONITORING_EVENT_PY_THROW) | + (1 << PY_MONITORING_EVENT_STOP_ITERATION); + } + return _PyMonitoring_SetEvents(PY_MONITORING_SYS_TRACE_ID, events); +} + +// Enable opcode tracing for the thread's current frame if needed. +static int +maybe_set_opcode_trace(PyThreadState *tstate) +{ + _PyInterpreterFrame *iframe = tstate->current_frame; + if (iframe == NULL) { + return 0; + } + PyFrameObject *frame = iframe->frame_obj; + if (frame == NULL || !frame->f_trace_opcodes) { + return 0; + } + return set_opcode_trace_world_stopped(_PyFrame_GetCode(iframe), true); } int @@ -603,35 +706,76 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { return -1; } - // needs to be decref'd outside of the lock - PyObject *old_traceobj; - LOCK_SETUP(); - assert(tstate->interp->sys_tracing_threads >= 0); - Py_ssize_t tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); - UNLOCK_SETUP(); - Py_XDECREF(old_traceobj); - if (tracing_threads < 0) { + + PyInterpreterState *interp = tstate->interp; + if (_PyOnceFlag_CallOnce(&interp->sys_trace_once_flag, + setup_trace_callbacks, NULL) < 0) { return -1; } - uint32_t events = 0; - if (tracing_threads) { - events = - (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | - (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | - (1 << PY_MONITORING_EVENT_RAISE) | (1 << PY_MONITORING_EVENT_LINE) | - (1 << PY_MONITORING_EVENT_JUMP) | - (1 << PY_MONITORING_EVENT_PY_UNWIND) | (1 << PY_MONITORING_EVENT_PY_THROW) | - (1 << PY_MONITORING_EVENT_STOP_ITERATION); + int err = 0; + _PyEval_StopTheWorld(interp); + PyObject *old_traceobj = swap_trace_func_arg(tstate, func, arg); + err = set_monitoring_trace_events(interp); + if (err != 0) { + goto done; + } + if (interp->sys_tracing_threads) { + err = maybe_set_opcode_trace(tstate); + } +done: + _PyEval_StartTheWorld(interp); + Py_XDECREF(old_traceobj); // needs to be decref'd outside stop-the-world + return err; +} + +int +_PyEval_SetTraceAllThreads(PyInterpreterState *interp, Py_tracefunc func, PyObject *arg) +{ + PyThreadState *current_tstate = _PyThreadState_GET(); + assert(is_tstate_valid(current_tstate)); + assert(current_tstate->interp == interp); + + if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + return -1; + } + + if (_PyOnceFlag_CallOnce(&interp->sys_trace_once_flag, + setup_trace_callbacks, NULL) < 0) { + return -1; + } - PyFrameObject* frame = PyEval_GetFrame(); - if (frame && frame->f_trace_opcodes) { - int ret = _PyEval_SetOpcodeTrace(frame, true); - if (ret != 0) { - return ret; + PyObject *old_trace_objs = NULL; + _PyEval_StopTheWorld(interp); + HEAD_LOCK(&_PyRuntime); + Py_ssize_t num_thread_states = 0; + _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) { + num_thread_states++; + } + old_trace_objs = PyTuple_New(num_thread_states); + if (old_trace_objs == NULL) { + HEAD_UNLOCK(&_PyRuntime); + _PyEval_StartTheWorld(interp); + return -1; + } + _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + PyObject *old = swap_trace_func_arg(tstate, func, arg); + PyTuple_SET_ITEM(old_trace_objs, --num_thread_states, old); + } + if (interp->sys_tracing_threads) { + _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + int err = maybe_set_opcode_trace(tstate); + if (err != 0) { + HEAD_UNLOCK(&_PyRuntime); + _PyEval_StartTheWorld(interp); + Py_XDECREF(old_trace_objs); + return -1; } } } - - return _PyMonitoring_SetEvents(PY_MONITORING_SYS_TRACE_ID, events); + HEAD_UNLOCK(&_PyRuntime); + int err = set_monitoring_trace_events(interp); + _PyEval_StartTheWorld(interp); + Py_XDECREF(old_trace_objs); // needs to be decref'd outside of stop-the-world + return err; } diff --git a/Python/lock.c b/Python/lock.c index 28a12ad18352d1..ca269bd2cb4b51 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_ACQUIRED; } } - else if (timeout == 0) { + if (timeout == 0) { return PY_LOCK_FAILURE; } @@ -619,3 +619,11 @@ PyMutex_Unlock(PyMutex *m) Py_FatalError("unlocking mutex that is not locked"); } } + + +#undef PyMutex_IsLocked +int +PyMutex_IsLocked(PyMutex *m) +{ + return _PyMutex_IsLocked(m); +} diff --git a/Python/marshal.c b/Python/marshal.c index b39c1a5b1ade50..50001497f6250b 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -11,6 +11,7 @@ #include "pycore_code.h" // _PyCode_New() #include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_long.h" // _PyLong_IsZero() +#include "pycore_object.h" // _PyObject_IsUniquelyReferenced #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_setobject.h" // _PySet_NextEntryRef() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() @@ -388,7 +389,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) * But we use TYPE_REF always for interned string, to PYC file stable * as possible. */ - if (Py_REFCNT(v) == 1 && + if (_PyObject_IsUniquelyReferenced(v) && !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) { return 0; } @@ -1656,6 +1657,9 @@ r_object(RFILE *p) case TYPE_SLICE: { Py_ssize_t idx = r_ref_reserve(flag, p); + if (idx < 0) { + break; + } PyObject *stop = NULL; PyObject *step = NULL; PyObject *start = r_object(p); diff --git a/Python/optimizer.c b/Python/optimizer.c index dde3dd8ebe745a..3a66aeb4f827b3 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -109,13 +109,21 @@ uop_optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *instr, /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. * If optimized, *executor_ptr contains a new reference to the executor */ -int +// gh-137573: inlining this function causes stack overflows +Py_NO_INLINE int _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - assert(_PyInterpreterState_GET()->jit); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!interp->jit) { + // gh-140936: It is possible that interp->jit will become false during + // interpreter finalization. However, the specialized JUMP_BACKWARD_JIT + // instruction may still be present. In this case, we should + // return immediately without optimization. + return 0; + } // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -1205,6 +1213,10 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(next_exit == -1); assert(dest == executor->trace); assert(dest->opcode == _START_EXECUTOR); + // Note: we MUST track it here before any Py_DECREF(executor) or + // linking of executor. Otherwise, the GC tries to untrack a + // still untracked object during dealloc. + _PyObject_GC_TRACK(executor); _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -1234,7 +1246,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil return NULL; } #endif - _PyObject_GC_TRACK(executor); return executor; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b0bd1e9518c6e..2bcd99b8bcdc6d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -103,6 +103,10 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if ((int)index >= dict->ma_keys->dk_nentries) { return NULL; } + PyDictKeysObject *keys = dict->ma_keys; + if (keys->dk_version != inst->operand0) { + return NULL; + } PyObject *res = entries[index].me_value; if (res == NULL) { return NULL; @@ -445,13 +449,10 @@ optimize_uops( _Py_uop_abstractcontext_init(ctx); _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0); if (frame == NULL) { - return -1; + return 0; } ctx->curr_frame_depth++; ctx->frame = frame; - ctx->done = false; - ctx->out_of_space = false; - ctx->contradiction = false; _PyUOpInstruction *this_instr = NULL; for (int i = 0; !ctx->done; i++) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e99421a3aff4ba..e3927d6c690a8b 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -580,7 +580,13 @@ dummy_func(void) { PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } + } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 56b4b9983fbaaa..53ebdc3b562fe0 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1235,7 +1235,12 @@ PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); PyObject *res = convert_global_to_const(this_instr, dict, true); - attr = sym_new_const(ctx, res); + if (res == NULL) { + attr = sym_new_not_null(ctx); + } + else { + attr = sym_new_const(ctx, res); + } } } } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e8a4f87031b76a..2387f215178624 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -691,6 +691,13 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx) // Frame setup ctx->curr_frame_depth = 0; + + // Ctx signals. + // Note: this must happen before frame_new, as it might override + // the result should frame_new set things to bottom. + ctx->done = false; + ctx->out_of_space = false; + ctx->contradiction = false; } int diff --git a/Python/parking_lot.c b/Python/parking_lot.c index 8edf43235942ab..0ffea97b079943 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -112,17 +112,27 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) } } - // NOTE: we wait on the sigint event even in non-main threads to match the - // behavior of the other platforms. Non-main threads will ignore the - // Py_PARK_INTR result. - HANDLE sigint_event = _PyOS_SigintEvent(); - HANDLE handles[2] = { sema->platform_sem, sigint_event }; - DWORD count = sigint_event != NULL ? 2 : 1; + HANDLE handles[2] = { sema->platform_sem, NULL }; + HANDLE sigint_event = NULL; + DWORD count = 1; + if (_Py_IsMainThread()) { + // gh-135099: Wait on the SIGINT event only in the main thread. Other + // threads would ignore the result anyways, and accessing + // `_PyOS_SigintEvent()` from non-main threads may race with + // interpreter shutdown, which closes the event handle. Note that + // non-main interpreters will ignore the result. + sigint_event = _PyOS_SigintEvent(); + if (sigint_event != NULL) { + handles[1] = sigint_event; + count = 2; + } + } wait = WaitForMultipleObjects(count, handles, FALSE, millis); if (wait == WAIT_OBJECT_0) { res = Py_PARK_OK; } else if (wait == WAIT_OBJECT_0 + 1) { + assert(sigint_event != NULL); ResetEvent(sigint_event); res = Py_PARK_INTR; } @@ -332,7 +342,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, enqueue(bucket, addr, &wait); _PyRawMutex_Unlock(&bucket->mutex); - int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach); + PyThreadState *tstate = NULL; + if (detach) { + tstate = _PyThreadState_GET(); + if (tstate && _PyThreadState_IsAttached(tstate)) { + // Only detach if we are attached + PyEval_ReleaseThread(tstate); + } + else { + tstate = NULL; + } + } + + int res = _PySemaphore_Wait(&wait.sema, timeout_ns, 0); if (res == Py_PARK_OK) { goto done; } @@ -344,7 +366,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, // Another thread has started to unpark us. Wait until we process the // wakeup signal. do { - res = _PySemaphore_Wait(&wait.sema, -1, detach); + res = _PySemaphore_Wait(&wait.sema, -1, 0); } while (res != Py_PARK_OK); goto done; } @@ -356,6 +378,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, done: _PySemaphore_Destroy(&wait.sema); + if (tstate) { + PyEval_AcquireThread(tstate); + } return res; } diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 1211e0e9f112b7..fafe393065bf6b 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -1,241 +1,353 @@ +/* + * Python Perf Trampoline Support - JIT Dump Implementation + * + * This file implements the perf jitdump API for Python's performance profiling + * integration. It allows perf (Linux performance analysis tool) to understand + * and profile dynamically generated Python bytecode by creating JIT dump files + * that perf can inject into its analysis. + * + * + * IMPORTANT: This file exports specific callback functions that are part of + * Python's internal API. Do not modify the function signatures or behavior + * of exported functions without coordinating with the Python core team. + * + * Usually the binary and libraries are mapped in separate region like below: + * + * address -> + * --+---------------------+--//--+---------------------+-- + * | .text | .data | ... | | .text | .data | ... | + * --+---------------------+--//--+---------------------+-- + * myprog libc.so + * + * So it'd be easy and straight-forward to find a mapped binary or library from an + * address. + * + * But for JIT code, the code arena only cares about the code section. But the + * resulting DSOs (which is generated by perf inject -j) contain ELF headers and + * unwind info too. Then it'd generate following address space with synthesized + * MMAP events. Let's say it has a sample between address B and C. + * + * sample + * | + * address -> A B v C + * --------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * --------------------------------------------------------------------------------------------------- + * + * If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see + * the unwind info. If it maps both .text section and unwind sections, the sample + * could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing + * which one is right. So to make perf happy we have non-overlapping ranges for each + * DSO: + * + * address -> + * ------------------------------------------------------------------------------------------------------- + * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | + * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | + * ... + * ------------------------------------------------------------------------------------------------------- + * + * As the trampolines are constant, we add a constant padding but in general the padding needs to have the + * size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + */ + + + #include "Python.h" #include "pycore_ceval.h" // _PyPerf_Callbacks #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_runtime.h" // _PyRuntime - #ifdef PY_HAVE_PERF_TRAMPOLINE -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> // mmap() -#include <sys/types.h> -#include <unistd.h> // sysconf() -#include <sys/time.h> // gettimeofday() -#include <sys/syscall.h> - -// ---------------------------------- -// Perf jitdump API -// ---------------------------------- - -typedef struct { - FILE* perf_map; - PyThread_type_lock map_lock; - void* mapped_buffer; - size_t mapped_size; - int code_id; -} PerfMapJitState; - -static PerfMapJitState perf_jit_map_state; +/* Standard library includes for perf jitdump implementation */ +#include <elf.h> // ELF architecture constants +#include <fcntl.h> // File control operations +#include <stdio.h> // Standard I/O operations +#include <stdlib.h> // Standard library functions +#include <sys/mman.h> // Memory mapping functions (mmap) +#include <sys/types.h> // System data types +#include <unistd.h> // System calls (sysconf, getpid) +#include <sys/time.h> // Time functions (gettimeofday) +#include <sys/syscall.h> // System call interface + +// ============================================================================= +// CONSTANTS AND CONFIGURATION +// ============================================================================= /* -Usually the binary and libraries are mapped in separate region like below: - - address -> - --+---------------------+--//--+---------------------+-- - | .text | .data | ... | | .text | .data | ... | - --+---------------------+--//--+---------------------+-- - myprog libc.so - -So it'd be easy and straight-forward to find a mapped binary or library from an -address. - -But for JIT code, the code arena only cares about the code section. But the -resulting DSOs (which is generated by perf inject -j) contain ELF headers and -unwind info too. Then it'd generate following address space with synthesized -MMAP events. Let's say it has a sample between address B and C. - - sample - | - address -> A B v C - --------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - --------------------------------------------------------------------------------------------------- - -If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see -the unwind info. If it maps both .text section and unwind sections, the sample -could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing -which one is right. So to make perf happy we have non-overlapping ranges for each -DSO: - - address -> - ------------------------------------------------------------------------------------------------------- - /tmp/jitted-PID-0.so | (headers) | .text | unwind info | - /tmp/jitted-PID-1.so | (headers) | .text | unwind info | - /tmp/jitted-PID-2.so | (headers) | .text | unwind info | - ... - ------------------------------------------------------------------------------------------------------- - -As the trampolines are constant, we add a constant padding but in general the padding needs to have the -size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 + * Memory layout considerations for perf jitdump: + * + * Perf expects non-overlapping memory regions for each JIT-compiled function. + * When perf processes the jitdump file, it creates synthetic DSO (Dynamic + * Shared Object) files that contain: + * - ELF headers + * - .text section (actual machine code) + * - Unwind information (for stack traces) + * + * To ensure proper address space layout, we add padding between code regions. + * This prevents address conflicts when perf maps the synthesized DSOs. + * + * Memory layout example: + * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] + * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding] + * + * The padding size is now calculated automatically during initialization + * based on the actual unwind information requirements. */ -#define PERF_JIT_CODE_PADDING 0x100 +/* Convenient access to the global trampoline API state */ #define trampoline_api _PyRuntime.ceval.perf.trampoline_api -typedef uint64_t uword; -typedef const char* CodeComments; +/* Type aliases for clarity and portability */ +typedef uint64_t uword; // Word-sized unsigned integer +typedef const char* CodeComments; // Code comment strings -#define Pd "d" -#define MB (1024 * 1024) +/* Memory size constants */ +#define MB (1024 * 1024) // 1 Megabyte for buffer sizing -#define EM_386 3 -#define EM_X86_64 62 -#define EM_ARM 40 -#define EM_AARCH64 183 -#define EM_RISCV 243 +// ============================================================================= +// ARCHITECTURE-SPECIFIC DEFINITIONS +// ============================================================================= -#define TARGET_ARCH_IA32 0 -#define TARGET_ARCH_X64 0 -#define TARGET_ARCH_ARM 0 -#define TARGET_ARCH_ARM64 0 -#define TARGET_ARCH_RISCV32 0 -#define TARGET_ARCH_RISCV64 0 - -#define FLAG_generate_perf_jitdump 0 -#define FLAG_write_protect_code 0 -#define FLAG_write_protect_vm_isolate 0 -#define FLAG_code_comments 0 - -#define UNREACHABLE() - -static uword GetElfMachineArchitecture(void) { -#if TARGET_ARCH_IA32 - return EM_386; -#elif TARGET_ARCH_X64 +/* + * Returns the ELF machine architecture constant for the current platform. + * This is required for the jitdump header to correctly identify the target + * architecture for perf processing. + * + */ +static uint64_t GetElfMachineArchitecture(void) { +#if defined(__x86_64__) || defined(_M_X64) return EM_X86_64; -#elif TARGET_ARCH_ARM - return EM_ARM; -#elif TARGET_ARCH_ARM64 +#elif defined(__i386__) || defined(_M_IX86) + return EM_386; +#elif defined(__aarch64__) return EM_AARCH64; -#elif TARGET_ARCH_RISCV32 || TARGET_ARCH_RISCV64 +#elif defined(__arm__) || defined(_M_ARM) + return EM_ARM; +#elif defined(__riscv) return EM_RISCV; #else - UNREACHABLE(); + Py_UNREACHABLE(); // Unsupported architecture - should never reach here return 0; #endif } +// ============================================================================= +// PERF JITDUMP DATA STRUCTURES +// ============================================================================= + +/* + * Perf jitdump file format structures + * + * These structures define the binary format that perf expects for JIT dump files. + * The format is documented in the Linux perf tools source code and must match + * exactly for proper perf integration. + */ + +/* + * Jitdump file header - written once at the beginning of each jitdump file + * Contains metadata about the process and jitdump format version + */ typedef struct { - uint32_t magic; - uint32_t version; - uint32_t size; - uint32_t elf_mach_target; - uint32_t reserved; - uint32_t process_id; - uint64_t time_stamp; - uint64_t flags; + uint32_t magic; // Magic number (0x4A695444 = "JiTD") + uint32_t version; // Jitdump format version (currently 1) + uint32_t size; // Size of this header structure + uint32_t elf_mach_target; // Target architecture (from GetElfMachineArchitecture) + uint32_t reserved; // Reserved field (must be 0) + uint32_t process_id; // Process ID of the JIT compiler + uint64_t time_stamp; // Timestamp when jitdump was created + uint64_t flags; // Feature flags (currently unused) } Header; - enum PerfEvent { - PerfLoad = 0, - PerfMove = 1, - PerfDebugInfo = 2, - PerfClose = 3, - PerfUnwindingInfo = 4 +/* + * Perf event types supported by the jitdump format + * Each event type has a corresponding structure format + */ +enum PerfEvent { + PerfLoad = 0, // Code load event (new JIT function) + PerfMove = 1, // Code move event (function relocated) + PerfDebugInfo = 2, // Debug information event + PerfClose = 3, // JIT session close event + PerfUnwindingInfo = 4 // Stack unwinding information event }; +/* + * Base event structure - common header for all perf events + * Every event in the jitdump file starts with this structure + */ struct BaseEvent { - uint32_t event; - uint32_t size; - uint64_t time_stamp; - }; + uint32_t event; // Event type (from PerfEvent enum) + uint32_t size; // Total size of this event including payload + uint64_t time_stamp; // Timestamp when event occurred +}; +/* + * Code load event - indicates a new JIT-compiled function is available + * This is the most important event type for Python profiling + */ typedef struct { - struct BaseEvent base; - uint32_t process_id; - uint32_t thread_id; - uint64_t vma; - uint64_t code_address; - uint64_t code_size; - uint64_t code_id; + struct BaseEvent base; // Common event header + uint32_t process_id; // Process ID where code was generated + uint32_t thread_id; // Thread ID where code was generated + uint64_t vma; // Virtual memory address where code is loaded + uint64_t code_address; // Address of the actual machine code + uint64_t code_size; // Size of the machine code in bytes + uint64_t code_id; // Unique identifier for this code region + /* Followed by: + * - null-terminated function name string + * - raw machine code bytes + */ } CodeLoadEvent; +/* + * Code unwinding information event - provides DWARF data for stack traces + * Essential for proper stack unwinding during profiling + */ typedef struct { - struct BaseEvent base; - uint64_t unwind_data_size; - uint64_t eh_frame_hdr_size; - uint64_t mapped_size; + struct BaseEvent base; // Common event header + uint64_t unwind_data_size; // Size of the unwinding data + uint64_t eh_frame_hdr_size; // Size of the EH frame header + uint64_t mapped_size; // Total mapped size (with padding) + /* Followed by: + * - EH frame header + * - DWARF unwinding information + * - Padding to alignment boundary + */ } CodeUnwindingInfoEvent; -static const intptr_t nanoseconds_per_second = 1000000000; - -// Dwarf encoding constants +// ============================================================================= +// GLOBAL STATE MANAGEMENT +// ============================================================================= -static const uint8_t DwarfUData4 = 0x03; -static const uint8_t DwarfSData4 = 0x0b; -static const uint8_t DwarfPcRel = 0x10; -static const uint8_t DwarfDataRel = 0x30; -// static uint8_t DwarfOmit = 0xff; +/* + * Global state for the perf jitdump implementation + * + * This structure maintains all the state needed for generating jitdump files. + * It's designed as a singleton since there's typically only one jitdump file + * per Python process. + */ typedef struct { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - int32_t eh_frame_ptr; - int32_t eh_fde_count; - int32_t from; - int32_t to; -} EhFrameHeader; + FILE* perf_map; // File handle for the jitdump file + PyThread_type_lock map_lock; // Thread synchronization lock + void* mapped_buffer; // Memory-mapped region (signals perf we're active) + size_t mapped_size; // Size of the mapped region + int code_id; // Counter for unique code region identifiers +} PerfMapJitState; + +/* Global singleton instance */ +static PerfMapJitState perf_jit_map_state; + +// ============================================================================= +// TIME UTILITIES +// ============================================================================= + +/* Time conversion constant */ +static const intptr_t nanoseconds_per_second = 1000000000; +/* + * Get current monotonic time in nanoseconds + * + * Monotonic time is preferred for event timestamps because it's not affected + * by system clock adjustments. This ensures consistent timing relationships + * between events even if the system clock is changed. + * + * Returns: Current monotonic time in nanoseconds since an arbitrary epoch + */ static int64_t get_current_monotonic_ticks(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Should never fail on supported systems return 0; } - // Convert to nanoseconds. + + /* Convert to nanoseconds for maximum precision */ int64_t result = ts.tv_sec; result *= nanoseconds_per_second; result += ts.tv_nsec; return result; } +/* + * Get current wall clock time in microseconds + * + * Used for the jitdump file header timestamp. Unlike monotonic time, + * this represents actual wall clock time that can be correlated with + * other system events. + * + * Returns: Current time in microseconds since Unix epoch + */ static int64_t get_current_time_microseconds(void) { - // gettimeofday has microsecond resolution. - struct timeval tv; - if (gettimeofday(&tv, NULL) < 0) { - UNREACHABLE(); - return 0; - } - return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; + struct timeval tv; + if (gettimeofday(&tv, NULL) < 0) { + Py_UNREACHABLE(); // Should never fail on supported systems + return 0; + } + return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; } +// ============================================================================= +// UTILITY FUNCTIONS +// ============================================================================= +/* + * Round up a value to the next multiple of a given number + * + * This is essential for maintaining proper alignment requirements in the + * jitdump format. Many structures need to be aligned to specific boundaries + * (typically 8 or 16 bytes) for efficient processing by perf. + * + * Args: + * value: The value to round up + * multiple: The multiple to round up to + * + * Returns: The smallest value >= input that is a multiple of 'multiple' + */ static size_t round_up(int64_t value, int64_t multiple) { if (multiple == 0) { - // Avoid division by zero - return value; + return value; // Avoid division by zero } int64_t remainder = value % multiple; if (remainder == 0) { - // Value is already a multiple of 'multiple' - return value; + return value; // Already aligned } - // Calculate the difference to the next multiple + /* Calculate how much to add to reach the next multiple */ int64_t difference = multiple - remainder; - - // Add the difference to the value int64_t rounded_up_value = value + difference; return rounded_up_value; } +// ============================================================================= +// FILE I/O UTILITIES +// ============================================================================= +/* + * Write data to the jitdump file with error handling + * + * This function ensures that all data is written to the file, handling + * partial writes that can occur with large buffers or when the system + * is under load. + * + * Args: + * buffer: Pointer to data to write + * size: Number of bytes to write + */ static void perf_map_jit_write_fully(const void* buffer, size_t size) { FILE* out_file = perf_jit_map_state.perf_map; const char* ptr = (const char*)(buffer); + while (size > 0) { const size_t written = fwrite(ptr, 1, size, out_file); if (written == 0) { - UNREACHABLE(); + Py_UNREACHABLE(); // Write failure - should be very rare break; } size -= written; @@ -243,284 +355,820 @@ static void perf_map_jit_write_fully(const void* buffer, size_t size) { } } +/* + * Write the jitdump file header + * + * The header must be written exactly once at the beginning of each jitdump + * file. It provides metadata that perf uses to parse the rest of the file. + * + * Args: + * pid: Process ID to include in the header + * out_file: File handle to write to (currently unused, uses global state) + */ static void perf_map_jit_write_header(int pid, FILE* out_file) { Header header; - header.magic = 0x4A695444; - header.version = 1; - header.size = sizeof(Header); - header.elf_mach_target = GetElfMachineArchitecture(); - header.process_id = pid; - header.time_stamp = get_current_time_microseconds(); - header.flags = 0; - perf_map_jit_write_fully(&header, sizeof(header)); -} - -static void* perf_map_jit_init(void) { - char filename[100]; - int pid = getpid(); - snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); - const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); - if (fd == -1) { - return NULL; - } - - const long page_size = sysconf(_SC_PAGESIZE); // NOLINT(runtime/int) - if (page_size == -1) { - close(fd); - return NULL; - } - // The perf jit interface forces us to map the first page of the file - // to signal that we are using the interface. - perf_jit_map_state.mapped_buffer = mmap(NULL, page_size, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); - if (perf_jit_map_state.mapped_buffer == NULL) { - close(fd); - return NULL; - } - perf_jit_map_state.mapped_size = page_size; - perf_jit_map_state.perf_map = fdopen(fd, "w+"); - if (perf_jit_map_state.perf_map == NULL) { - close(fd); - return NULL; - } - setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); - perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); - - perf_jit_map_state.map_lock = PyThread_allocate_lock(); - if (perf_jit_map_state.map_lock == NULL) { - fclose(perf_jit_map_state.perf_map); - return NULL; - } - perf_jit_map_state.code_id = 0; + /* Initialize header with required values */ + header.magic = 0x4A695444; // "JiTD" magic number + header.version = 1; // Current jitdump version + header.size = sizeof(Header); // Header size for validation + header.elf_mach_target = GetElfMachineArchitecture(); // Target architecture + header.process_id = pid; // Process identifier + header.time_stamp = get_current_time_microseconds(); // Creation time + header.flags = 0; // No special flags currently used - trampoline_api.code_padding = PERF_JIT_CODE_PADDING; - return &perf_jit_map_state; + perf_map_jit_write_fully(&header, sizeof(header)); } -/* DWARF definitions. */ +// ============================================================================= +// DWARF CONSTANTS AND UTILITIES +// ============================================================================= +/* + * DWARF (Debug With Arbitrary Record Formats) constants + * + * DWARF is a debugging data format used to provide stack unwinding information. + * These constants define the various encoding types and opcodes used in + * DWARF Call Frame Information (CFI) records. + */ + +/* DWARF Call Frame Information version */ #define DWRF_CIE_VERSION 1 +/* DWARF CFA (Call Frame Address) opcodes */ enum { - DWRF_CFA_nop = 0x0, - DWRF_CFA_offset_extended = 0x5, - DWRF_CFA_def_cfa = 0xc, - DWRF_CFA_def_cfa_offset = 0xe, - DWRF_CFA_offset_extended_sf = 0x11, - DWRF_CFA_advance_loc = 0x40, - DWRF_CFA_offset = 0x80 + DWRF_CFA_nop = 0x0, // No operation + DWRF_CFA_offset_extended = 0x5, // Extended offset instruction + DWRF_CFA_def_cfa = 0xc, // Define CFA rule + DWRF_CFA_def_cfa_register = 0xd, // Define CFA register + DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset + DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset + DWRF_CFA_advance_loc = 0x40, // Advance location counter + DWRF_CFA_offset = 0x80, // Simple offset instruction + DWRF_CFA_restore = 0xc0 // Restore register }; -enum - { - DWRF_EH_PE_absptr = 0x00, - DWRF_EH_PE_omit = 0xff, - - /* FDE data encoding. */ - DWRF_EH_PE_uleb128 = 0x01, - DWRF_EH_PE_udata2 = 0x02, - DWRF_EH_PE_udata4 = 0x03, - DWRF_EH_PE_udata8 = 0x04, - DWRF_EH_PE_sleb128 = 0x09, - DWRF_EH_PE_sdata2 = 0x0a, - DWRF_EH_PE_sdata4 = 0x0b, - DWRF_EH_PE_sdata8 = 0x0c, - DWRF_EH_PE_signed = 0x08, - - /* FDE flags. */ - DWRF_EH_PE_pcrel = 0x10, - DWRF_EH_PE_textrel = 0x20, - DWRF_EH_PE_datarel = 0x30, - DWRF_EH_PE_funcrel = 0x40, - DWRF_EH_PE_aligned = 0x50, - - DWRF_EH_PE_indirect = 0x80 - }; +/* DWARF Exception Handling pointer encodings */ +enum { + DWRF_EH_PE_absptr = 0x00, // Absolute pointer + DWRF_EH_PE_omit = 0xff, // Omitted value + + /* Data type encodings */ + DWRF_EH_PE_uleb128 = 0x01, // Unsigned LEB128 + DWRF_EH_PE_udata2 = 0x02, // Unsigned 2-byte + DWRF_EH_PE_udata4 = 0x03, // Unsigned 4-byte + DWRF_EH_PE_udata8 = 0x04, // Unsigned 8-byte + DWRF_EH_PE_sleb128 = 0x09, // Signed LEB128 + DWRF_EH_PE_sdata2 = 0x0a, // Signed 2-byte + DWRF_EH_PE_sdata4 = 0x0b, // Signed 4-byte + DWRF_EH_PE_sdata8 = 0x0c, // Signed 8-byte + DWRF_EH_PE_signed = 0x08, // Signed flag + + /* Reference type encodings */ + DWRF_EH_PE_pcrel = 0x10, // PC-relative + DWRF_EH_PE_textrel = 0x20, // Text-relative + DWRF_EH_PE_datarel = 0x30, // Data-relative + DWRF_EH_PE_funcrel = 0x40, // Function-relative + DWRF_EH_PE_aligned = 0x50, // Aligned + DWRF_EH_PE_indirect = 0x80 // Indirect +}; +/* Additional DWARF constants for debug information */ enum { DWRF_TAG_compile_unit = 0x11 }; - enum { DWRF_children_no = 0, DWRF_children_yes = 1 }; +enum { + DWRF_AT_name = 0x03, // Name attribute + DWRF_AT_stmt_list = 0x10, // Statement list + DWRF_AT_low_pc = 0x11, // Low PC address + DWRF_AT_high_pc = 0x12 // High PC address +}; +enum { + DWRF_FORM_addr = 0x01, // Address form + DWRF_FORM_data4 = 0x06, // 4-byte data + DWRF_FORM_string = 0x08 // String form +}; -enum { DWRF_AT_name = 0x03, DWRF_AT_stmt_list = 0x10, DWRF_AT_low_pc = 0x11, DWRF_AT_high_pc = 0x12 }; - -enum { DWRF_FORM_addr = 0x01, DWRF_FORM_data4 = 0x06, DWRF_FORM_string = 0x08 }; - -enum { DWRF_LNS_extended_op = 0, DWRF_LNS_copy = 1, DWRF_LNS_advance_pc = 2, DWRF_LNS_advance_line = 3 }; +/* Line number program opcodes */ +enum { + DWRF_LNS_extended_op = 0, // Extended opcode + DWRF_LNS_copy = 1, // Copy operation + DWRF_LNS_advance_pc = 2, // Advance program counter + DWRF_LNS_advance_line = 3 // Advance line number +}; -enum { DWRF_LNE_end_sequence = 1, DWRF_LNE_set_address = 2 }; +/* Line number extended opcodes */ +enum { + DWRF_LNE_end_sequence = 1, // End of sequence + DWRF_LNE_set_address = 2 // Set address +}; +/* + * Architecture-specific DWARF register numbers + * + * These constants define the register numbering scheme used by DWARF + * for each supported architecture. The numbers must match the ABI + * specification for proper stack unwinding. + */ enum { #ifdef __x86_64__ - /* Yes, the order is strange, but correct. */ - DWRF_REG_AX, - DWRF_REG_DX, - DWRF_REG_CX, - DWRF_REG_BX, - DWRF_REG_SI, - DWRF_REG_DI, - DWRF_REG_BP, - DWRF_REG_SP, - DWRF_REG_8, - DWRF_REG_9, - DWRF_REG_10, - DWRF_REG_11, - DWRF_REG_12, - DWRF_REG_13, - DWRF_REG_14, - DWRF_REG_15, - DWRF_REG_RA, + /* x86_64 register numbering (note: order is defined by x86_64 ABI) */ + DWRF_REG_AX, // RAX + DWRF_REG_DX, // RDX + DWRF_REG_CX, // RCX + DWRF_REG_BX, // RBX + DWRF_REG_SI, // RSI + DWRF_REG_DI, // RDI + DWRF_REG_BP, // RBP + DWRF_REG_SP, // RSP + DWRF_REG_8, // R8 + DWRF_REG_9, // R9 + DWRF_REG_10, // R10 + DWRF_REG_11, // R11 + DWRF_REG_12, // R12 + DWRF_REG_13, // R13 + DWRF_REG_14, // R14 + DWRF_REG_15, // R15 + DWRF_REG_RA, // Return address (RIP) #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_REG_SP = 31, - DWRF_REG_RA = 30, + /* AArch64 register numbering */ + DWRF_REG_FP = 29, // Frame Pointer + DWRF_REG_RA = 30, // Link register (return address) + DWRF_REG_SP = 31, // Stack pointer #else # error "Unsupported target architecture" #endif }; -typedef struct ELFObjectContext -{ - uint8_t* p; /* Pointer to next address in obj.space. */ - uint8_t* startp; /* Pointer to start address in obj.space. */ - uint8_t* eh_frame_p; /* Pointer to start address in obj.space. */ - uint32_t code_size; /* Size of machine code. */ +/* DWARF encoding constants used in EH frame headers */ +static const uint8_t DwarfUData4 = 0x03; // Unsigned 4-byte data +static const uint8_t DwarfSData4 = 0x0b; // Signed 4-byte data +static const uint8_t DwarfPcRel = 0x10; // PC-relative encoding +static const uint8_t DwarfDataRel = 0x30; // Data-relative encoding + +// ============================================================================= +// ELF OBJECT CONTEXT +// ============================================================================= + +/* + * Context for building ELF/DWARF structures + * + * This structure maintains state while constructing DWARF unwind information. + * It acts as a simple buffer manager with pointers to track current position + * and important landmarks within the buffer. + */ +typedef struct ELFObjectContext { + uint8_t* p; // Current write position in buffer + uint8_t* startp; // Start of buffer (for offset calculations) + uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets) + uint8_t* fde_p; // Start of FDE data (for PC-relative calculations) + uint32_t code_size; // Size of the code being described } ELFObjectContext; -/* Append a null-terminated string. */ -static uint32_t -elfctx_append_string(ELFObjectContext* ctx, const char* str) -{ +/* + * EH Frame Header structure for DWARF unwinding + * + * This structure provides metadata about the DWARF unwinding information + * that follows. It's required by the perf jitdump format to enable proper + * stack unwinding during profiling. + */ +typedef struct { + unsigned char version; // EH frame version (always 1) + unsigned char eh_frame_ptr_enc; // Encoding of EH frame pointer + unsigned char fde_count_enc; // Encoding of FDE count + unsigned char table_enc; // Encoding of table entries + int32_t eh_frame_ptr; // Pointer to EH frame data + int32_t eh_fde_count; // Number of FDEs (Frame Description Entries) + int32_t from; // Start address of code range + int32_t to; // End address of code range +} EhFrameHeader; + +// ============================================================================= +// DWARF GENERATION UTILITIES +// ============================================================================= + +/* + * Append a null-terminated string to the ELF context buffer + * + * Args: + * ctx: ELF object context + * str: String to append (must be null-terminated) + * + * Returns: Offset from start of buffer where string was written + */ +static uint32_t elfctx_append_string(ELFObjectContext* ctx, const char* str) { uint8_t* p = ctx->p; uint32_t ofs = (uint32_t)(p - ctx->startp); + + /* Copy string including null terminator */ do { *p++ = (uint8_t)*str; } while (*str++); + ctx->p = p; return ofs; } -/* Append a SLEB128 value. */ -static void -elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) -{ +/* + * Append a SLEB128 (Signed Little Endian Base 128) value + * + * SLEB128 is a variable-length encoding used extensively in DWARF. + * It efficiently encodes small numbers in fewer bytes. + * + * Args: + * ctx: ELF object context + * v: Signed value to encode + */ +static void elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { - *p++ = (uint8_t)((v & 0x7f) | 0x80); + *p++ = (uint8_t)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (uint8_t)(v & 0x7f); + *p++ = (uint8_t)(v & 0x7f); // Final byte without continuation bit + ctx->p = p; } -/* Append a ULEB128 to buffer. */ -static void -elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) -{ +/* + * Append a ULEB128 (Unsigned Little Endian Base 128) value + * + * Similar to SLEB128 but for unsigned values. + * + * Args: + * ctx: ELF object context + * v: Unsigned value to encode + */ +static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { uint8_t* p = ctx->p; + + /* Encode 7 bits at a time, with continuation bit in MSB */ for (; v >= 0x80; v >>= 7) { - *p++ = (char)((v & 0x7f) | 0x80); + *p++ = (char)((v & 0x7f) | 0x80); // Set continuation bit } - *p++ = (char)v; + *p++ = (char)v; // Final byte without continuation bit + ctx->p = p; } -/* Shortcuts to generate DWARF structures. */ -#define DWRF_U8(x) (*p++ = (x)) -#define DWRF_I8(x) (*(int8_t*)p = (x), p++) -#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) -#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) -#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) -#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) -#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) -#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) -#define DWRF_ALIGNNOP(s) \ - while ((uintptr_t)p & ((s)-1)) { \ - *p++ = DWRF_CFA_nop; \ +/* + * Macros for generating DWARF structures + * + * These macros provide a convenient way to write various data types + * to the DWARF buffer while automatically advancing the pointer. + */ +#define DWRF_U8(x) (*p++ = (x)) // Write unsigned 8-bit +#define DWRF_I8(x) (*(int8_t*)p = (x), p++) // Write signed 8-bit +#define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2) // Write unsigned 16-bit +#define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4) // Write unsigned 32-bit +#define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) // Write address +#define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) // Write ULEB128 +#define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) // Write SLEB128 +#define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) // Write string + +/* Align to specified boundary with NOP instructions */ +#define DWRF_ALIGNNOP(s) \ + while ((uintptr_t)p & ((s)-1)) { \ + *p++ = DWRF_CFA_nop; \ } -#define DWRF_SECTION(name, stmt) \ - { \ - uint32_t* szp_##name = (uint32_t*)p; \ - p += 4; \ - stmt; \ - *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ + +/* Write a DWARF section with automatic size calculation */ +#define DWRF_SECTION(name, stmt) \ + { \ + uint32_t* szp_##name = (uint32_t*)p; \ + p += 4; \ + stmt; \ + *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ } -/* Initialize .eh_frame section. */ -static void -elf_init_ehframe(ELFObjectContext* ctx) -{ +// ============================================================================= +// DWARF EH FRAME GENERATION +// ============================================================================= + +static void elf_init_ehframe(ELFObjectContext* ctx); + +/* + * Initialize DWARF .eh_frame section for a code region + * + * The .eh_frame section contains Call Frame Information (CFI) that describes + * how to unwind the stack at any point in the code. This is essential for + * proper profiling as it allows perf to generate accurate call graphs. + * + * The function generates two main components: + * 1. CIE (Common Information Entry) - describes calling conventions + * 2. FDE (Frame Description Entry) - describes specific function unwinding + * + * Args: + * ctx: ELF object context containing code size and buffer pointers + */ +static size_t calculate_eh_frame_size(void) { + /* Calculate the EH frame size for the trampoline function */ + extern void *_Py_trampoline_func_start; + extern void *_Py_trampoline_func_end; + + size_t code_size = (char*)&_Py_trampoline_func_end - (char*)&_Py_trampoline_func_start; + + ELFObjectContext ctx; + char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) + ctx.code_size = code_size; + ctx.startp = ctx.p = (uint8_t*)buffer; + ctx.fde_p = NULL; + + elf_init_ehframe(&ctx); + return ctx.p - ctx.startp; +} + +static void elf_init_ehframe(ELFObjectContext* ctx) { uint8_t* p = ctx->p; - uint8_t* framep = p; - - /* Emit DWARF EH CIE. */ - DWRF_SECTION(CIE, DWRF_U32(0); /* Offset to CIE itself. */ - DWRF_U8(DWRF_CIE_VERSION); - DWRF_STR("zR"); /* Augmentation. */ - DWRF_UV(1); /* Code alignment factor. */ - DWRF_SV(-(int64_t)sizeof(uintptr_t)); /* Data alignment factor. */ - DWRF_U8(DWRF_REG_RA); /* Return address register. */ - DWRF_UV(1); - DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); /* Augmentation data. */ - DWRF_U8(DWRF_CFA_def_cfa); DWRF_UV(DWRF_REG_SP); DWRF_UV(sizeof(uintptr_t)); - DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); DWRF_UV(1); - DWRF_ALIGNNOP(sizeof(uintptr_t)); - ) + uint8_t* framep = p; // Remember start of frame data + + /* + * DWARF Unwind Table for Trampoline Function + * + * This section defines DWARF Call Frame Information (CFI) using encoded macros + * like `DWRF_U8`, `DWRF_UV`, and `DWRF_SECTION` to describe how the trampoline function + * preserves and restores registers. This is used by profiling tools (e.g., `perf`) + * and debuggers for stack unwinding in JIT-compiled code. + * + * ------------------------------------------------- + * TO REGENERATE THIS TABLE FROM GCC OBJECTS: + * ------------------------------------------------- + * + * 1. Create a trampoline source file (e.g., `trampoline.c`): + * + * #include <Python.h> + * typedef PyObject* (*py_evaluator)(void*, void*, int); + * PyObject* trampoline(void *ts, void *f, int throwflag, py_evaluator evaluator) { + * return evaluator(ts, f, throwflag); + * } + * + * 2. Compile to an object file with frame pointer preservation: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * 3. Extract DWARF unwind info from the object file: + * + * readelf -w trampoline.o + * + * Example output from `.eh_frame`: + * + * 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * 00000014 FDE cie=00000000 pc=0..14 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * -- These values can be verified by comparing with `readelf -w` or `llvm-dwarfdump --eh-frame`. + * + * ---------------------------------- + * HOW TO TRANSLATE TO DWRF_* MACROS: + * ---------------------------------- + * + * After compiling your trampoline with: + * + * gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c + * + * run: + * + * readelf -w trampoline.o + * + * to inspect the generated `.eh_frame` data. You will see two main components: + * + * 1. A CIE (Common Information Entry): shared configuration used by all FDEs. + * 2. An FDE (Frame Description Entry): function-specific unwind instructions. + * + * --------------------- + * Translating the CIE: + * --------------------- + * From `readelf -w`, you might see: + * + * 00000000 0000000000000010 00000000 CIE + * Version: 1 + * Augmentation: "zR" + * Code alignment factor: 4 + * Data alignment factor: -8 + * Return address column: 30 + * Augmentation data: 1b + * DW_CFA_def_cfa: r31 (sp) ofs 0 + * + * Map this to: + * + * DWRF_SECTION(CIE, + * DWRF_U32(0); // CIE ID (always 0 for CIEs) + * DWRF_U8(DWRF_CIE_VERSION); // Version: 1 + * DWRF_STR("zR"); // Augmentation string "zR" + * DWRF_UV(4); // Code alignment factor = 4 + * DWRF_SV(-8); // Data alignment factor = -8 + * DWRF_U8(DWRF_REG_RA); // Return address register (e.g., x30 = 30) + * DWRF_UV(1); // Augmentation data length = 1 + * DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // Encoding for FDE pointers + * + * DWRF_U8(DWRF_CFA_def_cfa); // DW_CFA_def_cfa + * DWRF_UV(DWRF_REG_SP); // Register: SP (r31) + * DWRF_UV(0); // Offset = 0 + * + * DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer size boundary + * ) + * + * Notes: + * - Use `DWRF_UV` for unsigned LEB128, `DWRF_SV` for signed LEB128. + * - `DWRF_REG_RA` and `DWRF_REG_SP` are architecture-defined constants. + * + * --------------------- + * Translating the FDE: + * --------------------- + * From `readelf -w`: + * + * 00000014 0000000000000020 00000018 FDE cie=00000000 pc=0000000000000000..0000000000000014 + * DW_CFA_advance_loc: 4 + * DW_CFA_def_cfa_offset: 16 + * DW_CFA_offset: r29 at cfa-16 + * DW_CFA_offset: r30 at cfa-8 + * DW_CFA_advance_loc: 12 + * DW_CFA_restore: r30 + * DW_CFA_restore: r29 + * DW_CFA_def_cfa_offset: 0 + * + * Map the FDE header and instructions to: + * + * DWRF_SECTION(FDE, + * DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here) + * DWRF_U32(pc_relative_offset); // PC-relative location of the code (calculated dynamically) + * DWRF_U32(ctx->code_size); // Code range covered by this FDE + * DWRF_U8(0); // Augmentation data length (none) + * + * DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 unit (1 * 4 = 4 bytes) + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 + * DWRF_UV(16); + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Save x29 (frame pointer) + * DWRF_UV(2); // At offset 2 * 8 = 16 bytes + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Save x30 (return address) + * DWRF_UV(1); // At offset 1 * 8 = 8 bytes + * + * DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance location by 3 units (3 * 4 = 12 bytes) + * + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore x30 + * DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore x29 + * + * DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + * DWRF_UV(0); + * ) + * + * To regenerate: + * 1. Get the `code alignment factor`, `data alignment factor`, and `RA column` from the CIE. + * 2. Note the range of the function from the FDE's `pc=...` line and map it to the JIT code as + * the code is in a different address space every time. + * 3. For each `DW_CFA_*` entry, use the corresponding `DWRF_*` macro: + * - `DW_CFA_def_cfa_offset` → DWRF_U8(DWRF_CFA_def_cfa_offset), DWRF_UV(value) + * - `DW_CFA_offset: rX` → DWRF_U8(DWRF_CFA_offset | reg), DWRF_UV(offset) + * - `DW_CFA_restore: rX` → DWRF_U8(DWRF_CFA_offset | reg) // restore is same as reusing offset + * - `DW_CFA_advance_loc: N` → DWRF_U8(DWRF_CFA_advance_loc | (N / code_alignment_factor)) + * 4. Use `DWRF_REG_FP`, `DWRF_REG_RA`, etc., for register numbers. + * 5. Use `sizeof(uintptr_t)` (typically 8) for pointer size calculations and alignment. + */ + + /* + * Emit DWARF EH CIE (Common Information Entry) + * + * The CIE describes the calling conventions and basic unwinding rules + * that apply to all functions in this compilation unit. + */ + DWRF_SECTION(CIE, + DWRF_U32(0); // CIE ID (0 indicates this is a CIE) + DWRF_U8(DWRF_CIE_VERSION); // CIE version (1) + DWRF_STR("zR"); // Augmentation string ("zR" = has LSDA) +#ifdef __x86_64__ + DWRF_UV(1); // Code alignment factor (x86_64: 1 byte) +#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) + DWRF_UV(4); // Code alignment factor (AArch64: 4 bytes per instruction) +#endif + DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative) + DWRF_U8(DWRF_REG_RA); // Return address register number + DWRF_UV(1); // Augmentation data length + DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // FDE pointer encoding - ctx->eh_frame_p = p; + /* Initial CFI instructions - describe default calling convention */ +#ifdef __x86_64__ + /* x86_64 initial CFI state */ + DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) + DWRF_UV(DWRF_REG_SP); // CFA = SP register + DWRF_UV(sizeof(uintptr_t)); // CFA = SP + pointer_size + DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved + DWRF_UV(1); // At offset 1 from CFA +#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) + /* AArch64 initial CFI state */ + DWRF_U8(DWRF_CFA_def_cfa); // Define CFA (Call Frame Address) + DWRF_UV(DWRF_REG_SP); // CFA = SP register + DWRF_UV(0); // CFA = SP + 0 (AArch64 starts with offset 0) + // No initial register saves in AArch64 CIE +#endif + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary + ) - /* Emit DWARF EH FDE. */ - DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); /* Offset to CIE. */ - DWRF_U32(-0x30); /* Machine code offset relative to .text. */ - DWRF_U32(ctx->code_size); /* Machine code length. */ - DWRF_U8(0); /* Augmentation data. */ - /* Registers saved in CFRAME. */ + ctx->eh_frame_p = p; // Remember start of FDE data + + /* + * Emit DWARF EH FDE (Frame Description Entry) + * + * The FDE describes unwinding information specific to this function. + * It references the CIE and provides function-specific CFI instructions. + * + * The PC-relative offset is calculated after the entire EH frame is built + * to ensure accurate positioning relative to the synthesized DSO layout. + */ + DWRF_SECTION(FDE, + DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference) + ctx->fde_p = p; // Remember where PC offset field is located for later calculation + DWRF_U32(0); // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe) + DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length) + DWRF_U8(0); // Augmentation data length (none) + + /* + * Architecture-specific CFI instructions + * + * These instructions describe how registers are saved and restored + * during function calls. Each architecture has different calling + * conventions and register usage patterns. + */ #ifdef __x86_64__ - DWRF_U8(DWRF_CFA_advance_loc | 4); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_advance_loc | 6); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(8); - /* Extra registers saved for JIT-compiled code. */ + /* x86_64 calling convention unwinding rules with frame pointer */ +# if defined(__CET__) && (__CET__ & 1) + DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes) +# endif + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte) + DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16 + DWRF_UV(16); // New offset: SP + 16 + DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16 + DWRF_UV(2); // Offset factor: 2 * 8 = 16 bytes + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes) + DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6 + DWRF_UV(DWRF_REG_BP); // Use base pointer register + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3 + DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8 + DWRF_UV(DWRF_REG_SP); // Use stack pointer register + DWRF_UV(8); // New offset: SP + 8 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__) - DWRF_U8(DWRF_CFA_advance_loc | 1); - DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); - DWRF_U8(DWRF_CFA_offset | 29); DWRF_UV(2); - DWRF_U8(DWRF_CFA_offset | 30); DWRF_UV(1); - DWRF_U8(DWRF_CFA_advance_loc | 3); - DWRF_U8(DWRF_CFA_offset | -(64 - 29)); - DWRF_U8(DWRF_CFA_offset | -(64 - 30)); - DWRF_U8(DWRF_CFA_def_cfa_offset); - DWRF_UV(0); + /* AArch64 calling convention unwinding rules */ + DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance by 1 instruction (4 bytes) + DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16 + DWRF_UV(16); // Stack pointer moved by 16 bytes + DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // x29 (frame pointer) saved + DWRF_UV(2); // At CFA-16 (2 * 8 = 16 bytes from CFA) + DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // x30 (link register) saved + DWRF_UV(1); // At CFA-8 (1 * 8 = 8 bytes from CFA) + DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (12 bytes) + DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this! + DWRF_U8(DWRF_CFA_restore | DWRF_REG_FP); // Restore x29 - NO DWRF_UV() after this! + DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 0 (stack restored) + DWRF_UV(0); // Back to original stack position #else # error "Unsupported target architecture" #endif - DWRF_ALIGNNOP(sizeof(uintptr_t));) - ctx->p = p; + DWRF_ALIGNNOP(sizeof(uintptr_t)); // Align to pointer boundary + ) + + ctx->p = p; // Update context pointer to end of generated data + + /* Calculate and update the PC-relative offset in the FDE + * + * When perf processes the jitdump, it creates a synthesized DSO with this layout: + * + * Synthesized DSO Memory Layout: + * ┌─────────────────────────────────────────────────────────────┐ < code_start + * │ Code Section │ + * │ (round_up(code_size, 8) bytes) │ + * ├─────────────────────────────────────────────────────────────┤ < start of EH frame data + * │ EH Frame Data │ + * │ ┌─────────────────────────────────────────────────────┐ │ + * │ │ CIE data │ │ + * │ └─────────────────────────────────────────────────────┘ │ + * │ ┌─────────────────────────────────────────────────────┐ │ + * │ │ FDE Header: │ │ + * │ │ - CIE offset (4 bytes) │ │ + * │ │ - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start + * │ │ - address range (4 bytes) │ │ (this specific field) + * │ │ CFI Instructions... │ │ + * │ └─────────────────────────────────────────────────────┘ │ + * ├─────────────────────────────────────────────────────────────┤ < reference_point + * │ EhFrameHeader │ + * │ (navigation metadata) │ + * └─────────────────────────────────────────────────────────────┘ + * + * The PC offset field in the FDE must contain the distance from itself to code_start: + * + * distance = code_start - fde_pc_field + * + * Where: + * fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame + * code_start_location = reference_point - eh_frame_size - round_up(code_size, 8) + * + * Therefore: + * distance = code_start_location - fde_pc_field_location + * = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame) + * = -rounded_code_size - fde_offset_in_frame + * = -(round_up(code_size, 8) + fde_offset_in_frame) + * + * Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field, + * + */ + if (ctx->fde_p != NULL) { + int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp); + int32_t rounded_code_size = round_up(ctx->code_size, 8); + int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame); + + + // Update the PC-relative offset in the FDE + *(int32_t*)ctx->fde_p = pc_relative_offset; + } +} + +// ============================================================================= +// JITDUMP INITIALIZATION +// ============================================================================= + +/* + * Initialize the perf jitdump interface + * + * This function sets up everything needed to generate jitdump files: + * 1. Creates the jitdump file with a unique name + * 2. Maps the first page to signal perf that we're using the interface + * 3. Writes the jitdump header + * 4. Initializes synchronization primitives + * + * The memory mapping is crucial - perf detects jitdump files by scanning + * for processes that have mapped files matching the pattern /tmp/jit-*.dump + * + * Returns: Pointer to initialized state, or NULL on failure + */ +static void* perf_map_jit_init(void) { + char filename[100]; + int pid = getpid(); + + /* Create unique filename based on process ID */ + snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); + + /* Create/open the jitdump file with appropriate permissions */ + const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); + if (fd == -1) { + return NULL; // Failed to create file + } + + /* Get system page size for memory mapping */ + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size == -1) { + close(fd); + return NULL; // Failed to get page size + } + + /* + * Map the first page of the jitdump file + * + * This memory mapping serves as a signal to perf that this process + * is generating JIT code. Perf scans /proc/.../maps looking for mapped + * files that match the jitdump naming pattern. + * + * The mapping must be PROT_READ | PROT_EXEC to be detected by perf. + */ + perf_jit_map_state.mapped_buffer = mmap( + NULL, // Let kernel choose address + page_size, // Map one page + PROT_READ | PROT_EXEC, // Read and execute permissions (required by perf) + MAP_PRIVATE, // Private mapping + fd, // File descriptor + 0 // Offset 0 (first page) + ); + + if (perf_jit_map_state.mapped_buffer == NULL) { + close(fd); + return NULL; // Memory mapping failed + } + + perf_jit_map_state.mapped_size = page_size; + + /* Convert file descriptor to FILE* for easier I/O operations */ + perf_jit_map_state.perf_map = fdopen(fd, "w+"); + if (perf_jit_map_state.perf_map == NULL) { + close(fd); + return NULL; // Failed to create FILE* + } + + /* + * Set up file buffering for better performance + * + * We use a large buffer (2MB) because jitdump files can be written + * frequently during program execution. Buffering reduces system call + * overhead and improves overall performance. + */ + setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); + + /* Write the jitdump file header */ + perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); + + /* + * Initialize thread synchronization lock + * + * Multiple threads may attempt to write to the jitdump file + * simultaneously. This lock ensures thread-safe access to the + * global jitdump state. + */ + perf_jit_map_state.map_lock = PyThread_allocate_lock(); + if (perf_jit_map_state.map_lock == NULL) { + fclose(perf_jit_map_state.perf_map); + return NULL; // Failed to create lock + } + + /* Initialize code ID counter */ + perf_jit_map_state.code_id = 0; + + /* Calculate padding size based on actual unwind info requirements */ + size_t eh_frame_size = calculate_eh_frame_size(); + size_t unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; + trampoline_api.code_padding = round_up(unwind_data_size, 16); + + return &perf_jit_map_state; } +// ============================================================================= +// MAIN JITDUMP ENTRY WRITING +// ============================================================================= + +/* + * Write a complete jitdump entry for a Python function + * + * This is the main function called by Python's trampoline system whenever + * a new piece of JIT-compiled code needs to be recorded. It writes both + * the unwinding information and the code load event to the jitdump file. + * + * The function performs these steps: + * 1. Initialize jitdump system if not already done + * 2. Extract function name and filename from Python code object + * 3. Generate DWARF unwinding information + * 4. Write unwinding info event to jitdump file + * 5. Write code load event to jitdump file + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * code_addr: Address where the compiled code resides + * code_size: Size of the compiled code in bytes + * co: Python code object containing metadata + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static void perf_map_jit_write_entry(void *state, const void *code_addr, - unsigned int code_size, PyCodeObject *co) + unsigned int code_size, PyCodeObject *co) { - + /* Initialize jitdump system on first use */ if (perf_jit_map_state.perf_map == NULL) { void* ret = perf_map_jit_init(); if(ret == NULL){ - return; + return; // Initialization failed, silently abort } } + /* + * Extract function information from Python code object + * + * We create a human-readable function name by combining the qualified + * name (includes class/module context) with the filename. This helps + * developers identify functions in perf reports. + */ const char *entry = ""; if (co->co_qualname != NULL) { entry = PyUnicode_AsUTF8(co->co_qualname); } + const char *filename = ""; if (co->co_filename != NULL) { filename = PyUnicode_AsUTF8(co->co_filename); } - + /* + * Create formatted function name for perf display + * + * Format: "py::<function_name>:<filename>" + * The "py::" prefix helps identify Python functions in mixed-language + * profiles (e.g., when profiling C extensions alongside Python code). + */ size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); if (perf_map_entry == NULL) { - return; + return; // Memory allocation failed } snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); @@ -528,90 +1176,186 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr, uword base = (uword)code_addr; uword size = code_size; - // Write the code unwinding info event. - - // Create unwinding information (eh frame) + /* + * Generate DWARF unwinding information + * + * DWARF data is essential for proper stack unwinding during profiling. + * Without it, perf cannot generate accurate call graphs, especially + * in optimized code where frame pointers may be omitted. + */ ELFObjectContext ctx; - char buffer[1024]; + char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) ctx.code_size = code_size; ctx.startp = ctx.p = (uint8_t*)buffer; + ctx.fde_p = NULL; // Initialize to NULL, will be set when FDE is written + + /* Generate EH frame (Exception Handling frame) data */ elf_init_ehframe(&ctx); int eh_frame_size = ctx.p - ctx.startp; - // Populate the unwind info event for perf + /* + * Write Code Unwinding Information Event + * + * This event must be written before the code load event to ensure + * perf has the unwinding information available when it processes + * the code region. + */ CodeUnwindingInfoEvent ev2; ev2.base.event = PerfUnwindingInfo; ev2.base.time_stamp = get_current_monotonic_ticks(); ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; - // Ensure we have enough space between DSOs when perf maps them - assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); + + /* Verify we don't exceed our padding budget */ + assert(ev2.unwind_data_size <= (uint64_t)trampoline_api.code_padding); + ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); - ev2.mapped_size = round_up(ev2.unwind_data_size, 16); + ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment + + /* Calculate total event size with padding */ int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size; - int padding_size = round_up(content_size, 8) - content_size; + int padding_size = round_up(content_size, 8) - content_size; // 8-byte align ev2.base.size = content_size + padding_size; - perf_map_jit_write_fully(&ev2, sizeof(ev2)); + /* Write the unwinding info event header */ + perf_map_jit_write_fully(&ev2, sizeof(ev2)); - // Populate the eh Frame header + /* + * Write EH Frame Header + * + * The EH frame header provides metadata about the DWARF unwinding + * information that follows. It includes pointers and counts that + * help perf navigate the unwinding data efficiently. + */ EhFrameHeader f; f.version = 1; - f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; - f.fde_count_enc = DwarfUData4; - f.table_enc = DwarfSData4 | DwarfDataRel; + f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; // PC-relative signed 4-byte + f.fde_count_enc = DwarfUData4; // Unsigned 4-byte count + f.table_enc = DwarfSData4 | DwarfDataRel; // Data-relative signed 4-byte + + /* Calculate relative offsets for EH frame navigation */ f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char)); - f.eh_fde_count = 1; + f.eh_fde_count = 1; // We generate exactly one FDE per function f.from = -(round_up(code_size, 8) + eh_frame_size); + int cie_size = ctx.eh_frame_p - ctx.startp; f.to = -(eh_frame_size - cie_size); + /* Write EH frame data and header */ perf_map_jit_write_fully(ctx.startp, eh_frame_size); perf_map_jit_write_fully(&f, sizeof(f)); + /* Write padding to maintain alignment */ char padding_bytes[] = "\0\0\0\0\0\0\0\0"; perf_map_jit_write_fully(&padding_bytes, padding_size); - // Write the code load event. + /* + * Write Code Load Event + * + * This event tells perf about the new code region. It includes: + * - Memory addresses and sizes + * - Process and thread identification + * - Function name for symbol resolution + * - The actual machine code bytes + */ CodeLoadEvent ev; ev.base.event = PerfLoad; ev.base.size = sizeof(ev) + (name_length+1) + size; ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); - ev.thread_id = syscall(SYS_gettid); - ev.vma = base; - ev.code_address = base; + ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call + ev.vma = base; // Virtual memory address + ev.code_address = base; // Same as VMA for our use case ev.code_size = size; + + /* Assign unique code ID and increment counter */ perf_jit_map_state.code_id += 1; ev.code_id = perf_jit_map_state.code_id; + /* Write code load event and associated data */ perf_map_jit_write_fully(&ev, sizeof(ev)); - perf_map_jit_write_fully(perf_map_entry, name_length+1); - perf_map_jit_write_fully((void*)(base), size); - return; + perf_map_jit_write_fully(perf_map_entry, name_length+1); // Include null terminator + perf_map_jit_write_fully((void*)(base), size); // Copy actual machine code + + /* Clean up allocated memory */ + PyMem_RawFree(perf_map_entry); } +// ============================================================================= +// CLEANUP AND FINALIZATION +// ============================================================================= + +/* + * Finalize and cleanup the perf jitdump system + * + * This function is called when Python is shutting down or when the + * perf trampoline system is being disabled. It ensures all resources + * are properly released and all buffered data is flushed to disk. + * + * Args: + * state: Jitdump state (currently unused, uses global state) + * + * Returns: 0 on success + * + * IMPORTANT: This function signature is part of Python's internal API + * and must not be changed without coordinating with core Python development. + */ static int perf_map_jit_fini(void* state) { + /* + * Close jitdump file with proper synchronization + * + * We need to acquire the lock to ensure no other threads are + * writing to the file when we close it. This prevents corruption + * and ensures all data is properly flushed. + */ if (perf_jit_map_state.perf_map != NULL) { - // close the file PyThread_acquire_lock(perf_jit_map_state.map_lock, 1); - fclose(perf_jit_map_state.perf_map); + fclose(perf_jit_map_state.perf_map); // This also flushes buffers PyThread_release_lock(perf_jit_map_state.map_lock); - // clean up the lock and state + /* Clean up synchronization primitive */ PyThread_free_lock(perf_jit_map_state.map_lock); perf_jit_map_state.perf_map = NULL; } + + /* + * Unmap the memory region + * + * This removes the signal to perf that we were generating JIT code. + * After this point, perf will no longer detect this process as + * having JIT capabilities. + */ if (perf_jit_map_state.mapped_buffer != NULL) { munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); + perf_jit_map_state.mapped_buffer = NULL; } + + /* Clear global state reference */ trampoline_api.state = NULL; - return 0; + + return 0; // Success } +// ============================================================================= +// PUBLIC API EXPORT +// ============================================================================= + +/* + * Python Perf Callbacks Structure + * + * This structure defines the callback interface that Python's trampoline + * system uses to integrate with perf profiling. It contains function + * pointers for initialization, event writing, and cleanup. + * + * CRITICAL: This structure and its contents are part of Python's internal + * API. The function signatures and behavior must remain stable to maintain + * compatibility with the Python interpreter's perf integration system. + * + * Used by: Python's _PyPerf_Callbacks system in pycore_ceval.h + */ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { - &perf_map_jit_init, - &perf_map_jit_write_entry, - &perf_map_jit_fini, + &perf_map_jit_init, // Initialization function + &perf_map_jit_write_entry, // Event writing function + &perf_map_jit_fini, // Cleanup function }; -#endif +#endif /* PY_HAVE_PERF_TRAMPOLINE */ diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 996e54b82b61e8..4690e7b718f3c8 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -162,6 +162,8 @@ static void invalidate_icache(char* begin, char*end) { } #endif +#define CODE_ALIGNMENT 32 + /* The function pointer is passed as last argument. The other three arguments * are passed in the same order as the function requires. This results in * shorter, more efficient ASM code for trampoline. @@ -291,7 +293,9 @@ new_code_arena(void) void *start = &_Py_trampoline_func_start; void *end = &_Py_trampoline_func_end; size_t code_size = end - start; - size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16); + size_t unaligned_size = code_size + trampoline_api.code_padding; + size_t chunk_size = round_up(unaligned_size, CODE_ALIGNMENT); + assert(chunk_size % CODE_ALIGNMENT == 0); // TODO: Check the effect of alignment of the code chunks. Initial investigation // showed that this has no effect on performance in x86-64 or aarch64 and the current // version has the advantage that the unwinder in GDB can unwind across JIT-ed code. @@ -356,7 +360,9 @@ static inline py_trampoline code_arena_new_code(code_arena_t *code_arena) { py_trampoline trampoline = (py_trampoline)code_arena->current_addr; - size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16); + size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, + CODE_ALIGNMENT); + assert(total_code_size % CODE_ALIGNMENT == 0); code_arena->size_left -= total_code_size; code_arena->current_addr += total_code_size; return trampoline; @@ -489,9 +495,6 @@ _PyPerfTrampoline_Init(int activate) } else { _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); - if (new_code_arena() < 0) { - return -1; - } extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); if (extra_code_index == -1) { return -1; @@ -499,6 +502,9 @@ _PyPerfTrampoline_Init(int activate) if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) { trampoline_api.state = trampoline_api.init_state(); } + if (new_code_arena() < 0) { + return -1; + } perf_status = PERF_STATUS_OK; } #endif diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index c4c1d9fd9e1380..fad69f82b60796 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -503,6 +503,7 @@ pycore_init_runtime(_PyRuntimeState *runtime, _PyRuntimeState_SetFinalizing(runtime, NULL); _Py_InitVersion(); + _Py_DumpTraceback_Init(); status = _Py_HashRandomization_Init(config); if (_PyStatus_EXCEPTION(status)) { @@ -760,6 +761,11 @@ pycore_init_types(PyInterpreterState *interp) return status; } + status = _PyDateTime_InitTypes(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + return _PyStatus_OK(); } @@ -1702,8 +1708,10 @@ finalize_modules(PyThreadState *tstate) #endif // Stop watching __builtin__ modifications - PyDict_Unwatch(0, interp->builtins); - + if (PyDict_Unwatch(0, interp->builtins) < 0) { + // might happen if interp is cleared before watching the __builtin__ + PyErr_Clear(); + } PyObject *modules = _PyImport_GetModules(interp); if (modules == NULL) { // Already done @@ -1992,6 +2000,7 @@ resolve_final_tstate(_PyRuntimeState *runtime) } else { /* Fall back to the current tstate. It's better than nothing. */ + // XXX No it's not main_tstate = tstate; } } @@ -2037,6 +2046,16 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyAtExit_Call(tstate->interp); + /* Clean up any lingering subinterpreters. + + Two preconditions need to be met here: + + - This has to happen before _PyRuntimeState_SetFinalizing is + called, or else threads might get prematurely blocked. + - The world must not be stopped, as finalizers can run. + */ + finalize_subinterpreters(); + assert(_PyThreadState_GET() == tstate); /* Copy the core config, PyInterpreterState_Delete() free @@ -2124,9 +2143,6 @@ _Py_Finalize(_PyRuntimeState *runtime) _PyImport_FiniExternal(tstate->interp); finalize_modules(tstate); - /* Clean up any lingering subinterpreters. */ - finalize_subinterpreters(); - /* Print debug stats if any */ _PyEval_Fini(); @@ -2291,18 +2307,17 @@ new_interpreter(PyThreadState **tstate_p, interpreters: disable PyGILState_Check(). */ runtime->gilstate.check_enabled = 0; - PyInterpreterState *interp = PyInterpreterState_New(); + // XXX Might new_interpreter() have been called without the GIL held? + PyThreadState *save_tstate = _PyThreadState_GET(); + PyThreadState *tstate = NULL; + PyInterpreterState *interp; + status = _PyInterpreterState_New(save_tstate, &interp); if (interp == NULL) { - *tstate_p = NULL; - return _PyStatus_OK(); + goto error; } _PyInterpreterState_SetWhence(interp, whence); interp->_ready = 1; - // XXX Might new_interpreter() have been called without the GIL held? - PyThreadState *save_tstate = _PyThreadState_GET(); - PyThreadState *tstate = NULL; - /* From this point until the init_interp_create_gil() call, we must not do anything that requires that the GIL be held (or otherwise exist). That applies whether or not the new @@ -2377,15 +2392,13 @@ new_interpreter(PyThreadState **tstate_p, error: *tstate_p = NULL; if (tstate != NULL) { - PyThreadState_Clear(tstate); - _PyThreadState_Detach(tstate); - PyThreadState_Delete(tstate); + Py_EndInterpreter(tstate); + } else if (interp != NULL) { + PyInterpreterState_Delete(interp); } if (save_tstate != NULL) { _PyThreadState_Attach(save_tstate); } - PyInterpreterState_Delete(interp); - return status; } @@ -2410,9 +2423,8 @@ Py_NewInterpreter(void) return tstate; } -/* Delete an interpreter and its last thread. This requires that the - given thread state is current, that the thread has no remaining - frames, and that it is its interpreter's only remaining thread. +/* Delete an interpreter. This requires that the given thread state + is current, and that the thread has no remaining frames. It is a fatal error to violate these constraints. (Py_FinalizeEx() doesn't have these constraints -- it zaps @@ -2442,15 +2454,20 @@ Py_EndInterpreter(PyThreadState *tstate) _Py_FinishPendingCalls(tstate); _PyAtExit_Call(tstate->interp); - - if (tstate != interp->threads.head || tstate->next != NULL) { - Py_FatalError("not the last thread"); - } - + _PyRuntimeState *runtime = interp->runtime; + _PyEval_StopTheWorldAll(runtime); /* Remaining daemon threads will automatically exit when they attempt to take the GIL (ex: PyEval_RestoreThread()). */ _PyInterpreterState_SetFinalizing(interp, tstate); + PyThreadState *list = _PyThreadState_RemoveExcept(tstate); + for (PyThreadState *p = list; p != NULL; p = p->next) { + _PyThreadState_SetShuttingDown(p); + } + + _PyEval_StartTheWorldAll(runtime); + _PyThreadState_DeleteList(list, /*is_after_fork=*/0); + // XXX Call something like _PyImport_Disable() here? _PyImport_FiniExternal(tstate->interp); @@ -2480,6 +2497,8 @@ finalize_subinterpreters(void) PyInterpreterState *main_interp = _PyInterpreterState_Main(); assert(final_tstate->interp == main_interp); _PyRuntimeState *runtime = main_interp->runtime; + assert(!runtime->stoptheworld.world_stopped); + assert(_PyRuntimeState_GetFinalizing(runtime) == NULL); struct pyinterpreters *interpreters = &runtime->interpreters; /* Get the first interpreter in the list. */ @@ -2499,7 +2518,7 @@ finalize_subinterpreters(void) (void)PyErr_WarnEx( PyExc_RuntimeWarning, "remaining subinterpreters; " - "destroy them with _interpreters.destroy()", + "close them with Interpreter.close()", 0); /* Swap out the current tstate, which we know must belong @@ -2508,27 +2527,17 @@ finalize_subinterpreters(void) /* Clean up all remaining subinterpreters. */ while (interp != NULL) { - assert(!_PyInterpreterState_IsRunningMain(interp)); - - /* Find the tstate to use for fini. We assume the interpreter - will have at most one tstate at this point. */ - PyThreadState *tstate = interp->threads.head; - if (tstate != NULL) { - /* Ideally we would be able to use tstate as-is, and rely - on it being in a ready state: no exception set, not - running anything (tstate->current_frame), matching the - current thread ID (tstate->thread_id). To play it safe, - we always delete it and use a fresh tstate instead. */ - assert(tstate != final_tstate); - _PyThreadState_Attach(tstate); - PyThreadState_Clear(tstate); - _PyThreadState_Detach(tstate); - PyThreadState_Delete(tstate); + /* Make a tstate for finalization. */ + PyThreadState *tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); + if (tstate == NULL) { + // XXX Some graceful way to always get a thread state? + Py_FatalError("thread state allocation failed"); } - tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); - /* Destroy the subinterpreter. */ + /* Enter the subinterpreter. */ _PyThreadState_Attach(tstate); + + /* Destroy the subinterpreter. */ Py_EndInterpreter(tstate); assert(_PyThreadState_GET() == NULL); @@ -3589,7 +3598,7 @@ PyOS_getsig(int sig) /* * All of the code in this function must only use async-signal-safe functions, - * listed at `man 7 signal` or + * listed at `man 7 signal-safety` or * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. */ PyOS_sighandler_t diff --git a/Python/pystate.c b/Python/pystate.c index 1ac134400856d4..7188e5bf361fa5 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -8,7 +8,6 @@ #include "pycore_codecs.h" // _PyCodec_Fini() #include "pycore_critical_section.h" // _PyCriticalSection_Resume() #include "pycore_dtoa.h" // _dtoa_state_INIT() -#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interpframe.h" // _PyThreadState_HasStackSpace() @@ -404,7 +403,6 @@ _Py_COMP_DIAG_POP &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ &(runtime)->ceval.pending_mainthread.mutex, \ - &(runtime)->ceval.sys_trace_profile_mutex, \ &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ @@ -434,11 +432,6 @@ init_runtime(_PyRuntimeState *runtime, runtime->main_thread = PyThread_get_thread_ident(); runtime->unicode_state.ids.next_index = unicode_next_index; - -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - _Py_EmscriptenTrampoline_Init(runtime); -#endif - runtime->_initialized = 1; } @@ -572,16 +565,19 @@ _PyInterpreterState_Enable(_PyRuntimeState *runtime) static PyInterpreterState * alloc_interpreter(void) { + // Aligned allocation for PyInterpreterState. + // the first word of the memory block is used to store + // the original pointer to be used later to free the memory. size_t alignment = _Alignof(PyInterpreterState); - size_t allocsize = sizeof(PyInterpreterState) + alignment - 1; + size_t allocsize = sizeof(PyInterpreterState) + sizeof(void *) + alignment - 1; void *mem = PyMem_RawCalloc(1, allocsize); if (mem == NULL) { return NULL; } - PyInterpreterState *interp = _Py_ALIGN_UP(mem, alignment); - assert(_Py_IS_ALIGNED(interp, alignment)); - interp->_malloced = mem; - return interp; + void *ptr = _Py_ALIGN_UP((char *)mem + sizeof(void *), alignment); + ((void **)ptr)[-1] = mem; + assert(_Py_IS_ALIGNED(ptr, alignment)); + return ptr; } static void @@ -596,7 +592,7 @@ free_interpreter(PyInterpreterState *interp) interp->obmalloc = NULL; } assert(_Py_IS_ALIGNED(interp, _Alignof(PyInterpreterState))); - PyMem_RawFree(interp->_malloced); + PyMem_RawFree(((void **)interp)[-1]); } } @@ -674,8 +670,7 @@ init_interpreter(PyInterpreterState *interp, } interp->monitoring_tool_versions[t] = 0; } - interp->sys_profile_initialized = false; - interp->sys_trace_initialized = false; + interp->_code_object_generation = 0; interp->jit = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -868,10 +863,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->audit_hooks); - // At this time, all the threads should be cleared so we don't need atomic - // operations for instrumentation_version or eval_breaker. + // gh-140257: Threads have already been cleared, but daemon threads may + // still access eval_breaker atomically via take_gil() right before they + // hang. Use an atomic store to prevent data races during finalization. interp->ceval.instrumentation_version = 0; - tstate->eval_breaker = 0; + _Py_atomic_store_uintptr(&tstate->eval_breaker, 0); for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; @@ -881,11 +877,13 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->monitoring_callables[t][e]); } } - interp->sys_profile_initialized = false; - interp->sys_trace_initialized = false; for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) { Py_CLEAR(interp->monitoring_tool_names[t]); } + interp->_code_object_generation = 0; +#ifdef Py_GIL_DISABLED + interp->tlbc_indices.tlbc_generation = 0; +#endif PyConfig_Clear(&interp->config); _PyCodec_Fini(interp); @@ -909,7 +907,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) _Py_ClearExecutorDeletionList(interp); #endif _PyAST_Fini(interp); - _PyWarnings_Fini(interp); _PyAtExit_Fini(interp); // All Python types must be destroyed before the last GC collection. Python @@ -920,6 +917,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) _PyGC_CollectNoFail(tstate); _PyGC_Fini(interp); + // Finalize warnings after last gc so that any finalizers can + // access warnings state + _PyWarnings_Fini(interp); /* We don't clear sysdict and builtins until the end of this function. Because clearing other attributes can execute arbitrary Python code which requires sysdict and builtins. */ @@ -1049,6 +1049,8 @@ PyInterpreterState_Delete(PyInterpreterState *interp) _PyObject_FiniState(interp); + PyConfig_Clear(&interp->config); + free_interpreter(interp); } @@ -1246,6 +1248,7 @@ _Py_CheckMainModule(PyObject *module) PyObject *msg = PyUnicode_FromString("invalid __main__ module"); if (msg != NULL) { (void)PyErr_SetImportError(msg, &_Py_ID(__main__), NULL); + Py_DECREF(msg); } return -1; } @@ -1457,9 +1460,6 @@ tstate_is_alive(PyThreadState *tstate) // lifecycle //---------- -/* Minimum size of data stack chunk */ -#define DATA_STACK_CHUNK_SIZE (16*1024) - static _PyStackChunk* allocate_chunk(int size_in_bytes, _PyStackChunk* previous) { @@ -1583,6 +1583,9 @@ init_threadstate(_PyThreadStateImpl *_tstate, _tstate->c_stack_top = 0; _tstate->c_stack_hard_limit = 0; + _tstate->c_stack_init_base = 0; + _tstate->c_stack_init_top = 0; + _tstate->asyncio_running_loop = NULL; _tstate->asyncio_running_task = NULL; @@ -1791,13 +1794,14 @@ PyThreadState_Clear(PyThreadState *tstate) } if (tstate->c_profilefunc != NULL) { - tstate->interp->sys_profiling_threads--; + FT_ATOMIC_ADD_SSIZE(tstate->interp->sys_profiling_threads, -1); tstate->c_profilefunc = NULL; } if (tstate->c_tracefunc != NULL) { - tstate->interp->sys_tracing_threads--; + FT_ATOMIC_ADD_SSIZE(tstate->interp->sys_tracing_threads, -1); tstate->c_tracefunc = NULL; } + Py_CLEAR(tstate->c_profileobj); Py_CLEAR(tstate->c_traceobj); @@ -1811,16 +1815,23 @@ PyThreadState_Clear(PyThreadState *tstate) struct _Py_freelists *freelists = _Py_freelists_GET(); _PyObject_ClearFreeLists(freelists, 1); + // Flush the thread's local GC allocation count to the global count + // before the thread state is cleared, otherwise the count is lost. + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + _Py_atomic_add_int(&tstate->interp->gc.young.count, + (int)tstate_impl->gc.alloc_count); + tstate_impl->gc.alloc_count = 0; + // Merge our thread-local refcounts into the type's own refcount and // free our local refcount array. - _PyObject_FinalizePerThreadRefcounts((_PyThreadStateImpl *)tstate); + _PyObject_FinalizePerThreadRefcounts(tstate_impl); // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); // Release our thread-local copies of the bytecode for reuse by another // thread - _Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate); + _Py_ClearTLBCIndex(tstate_impl); #endif // Merge our queue of pointers to be freed into the interpreter queue. @@ -1908,9 +1919,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) static void zapthreads(PyInterpreterState *interp) { + PyThreadState *tstate; /* No need to lock the mutex here because this should only happen - when the threads are all really dead (XXX famous last words). */ - _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { + when the threads are all really dead (XXX famous last words). + + Cannot use _Py_FOR_EACH_TSTATE_UNLOCKED because we are freeing + the thread states here. + */ + while ((tstate = interp->threads.head) != NULL) { tstate_verify_not_active(tstate); tstate_delete_common(tstate, 0); free_threadstate((_PyThreadStateImpl *)tstate); @@ -2345,13 +2361,15 @@ stop_the_world(struct _stoptheworld_state *stw) { _PyRuntimeState *runtime = &_PyRuntime; - PyMutex_Lock(&stw->mutex); + // gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon + // threads that may hang when blocked on lock acquisition. if (stw->is_global) { _PyRWMutex_Lock(&runtime->stoptheworld_mutex); } else { _PyRWMutex_RLock(&runtime->stoptheworld_mutex); } + PyMutex_Lock(&stw->mutex); HEAD_LOCK(runtime); stw->requested = 1; @@ -2417,13 +2435,13 @@ start_the_world(struct _stoptheworld_state *stw) } stw->requester = NULL; HEAD_UNLOCK(runtime); + PyMutex_Unlock(&stw->mutex); if (stw->is_global) { _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); } else { _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex); } - PyMutex_Unlock(&stw->mutex); } #endif // Py_GIL_DISABLED @@ -3007,7 +3025,7 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature static PyObject ** push_chunk(PyThreadState *tstate, int size) { - int allocate_size = DATA_STACK_CHUNK_SIZE; + int allocate_size = _PY_DATA_STACK_CHUNK_SIZE; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 4ee287af72fdb2..e005a509f6f061 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1365,6 +1365,29 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py return PyEval_EvalCode((PyObject*)co, globals, locals); } +static PyObject * +get_interactive_filename(PyObject *filename, Py_ssize_t count) +{ + PyObject *result; + Py_ssize_t len = PyUnicode_GET_LENGTH(filename); + + if (len >= 2 + && PyUnicode_ReadChar(filename, 0) == '<' + && PyUnicode_ReadChar(filename, len - 1) == '>') { + PyObject *middle = PyUnicode_Substring(filename, 1, len-1); + if (middle == NULL) { + return NULL; + } + result = PyUnicode_FromFormat("<%U-%d>", middle, count); + Py_DECREF(middle); + } else { + result = PyUnicode_FromFormat( + "%U-%d", filename, count); + } + return result; + +} + static PyObject * run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals, PyCompilerFlags *flags, PyArena *arena, PyObject* interactive_src, @@ -1375,8 +1398,8 @@ run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals, if (interactive_src) { PyInterpreterState *interp = tstate->interp; if (generate_new_source) { - interactive_filename = PyUnicode_FromFormat( - "%U-%d", filename, interp->_interactive_src_count++); + interactive_filename = get_interactive_filename( + filename, interp->_interactive_src_count++); } else { Py_INCREF(interactive_filename); } @@ -1524,6 +1547,26 @@ Py_CompileStringExFlags(const char *str, const char *filename_str, int start, return co; } +int +_PyObject_SupportedAsScript(PyObject *cmd) +{ + if (PyUnicode_Check(cmd)) { + return 1; + } + else if (PyBytes_Check(cmd)) { + return 1; + } + else if (PyByteArray_Check(cmd)) { + return 1; + } + else if (PyObject_CheckBuffer(cmd)) { + return 1; + } + else { + return 0; + } +} + const char * _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) { diff --git a/Python/qsbr.c b/Python/qsbr.c index bf34fb2523dfc8..c9fad5c05ef108 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -41,10 +41,6 @@ // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 -// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing -// the write sequence. -#define QSBR_DEFERRED_LIMIT 10 - // Allocate a QSBR thread state from the freelist static struct _qsbr_thread_state * qsbr_allocate(struct _qsbr_shared *shared) @@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared) } uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +_Py_qsbr_shared_next(struct _qsbr_shared *shared) { - if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { - return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; - } - qsbr->deferrals = 0; - return _Py_qsbr_advance(qsbr->shared); + return _Py_qsbr_shared_current(shared) + QSBR_INCR; } static uint64_t diff --git a/Python/remote_debug.h b/Python/remote_debug.h index edc77c302916ca..8f9b6cd4c4960f 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -13,6 +13,16 @@ If you need to add a new function ensure that is declared 'static'. extern "C" { #endif +#ifdef __clang__ + #define UNUSED __attribute__((unused)) +#elif defined(__GNUC__) + #define UNUSED __attribute__((unused)) +#elif defined(_MSC_VER) + #define UNUSED __pragma(warning(suppress: 4505)) +#else + #define UNUSED +#endif + #if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) # error "this header requires Py_BUILD_CORE or Py_BUILD_CORE_MODULE define" #endif @@ -35,7 +45,7 @@ extern "C" { # include <sys/mman.h> #endif -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX # include <libproc.h> # include <mach-o/fat.h> # include <mach-o/loader.h> @@ -73,27 +83,105 @@ extern "C" { # define HAVE_PROCESS_VM_READV 0 #endif +#define _set_debug_exception_cause(exception, format, ...) \ + do { \ + if (!PyErr_ExceptionMatches(PyExc_PermissionError)) { \ + PyThreadState *tstate = _PyThreadState_GET(); \ + if (!_PyErr_Occurred(tstate)) { \ + _PyErr_Format(tstate, exception, format, ##__VA_ARGS__); \ + } else { \ + _PyErr_FormatFromCause(exception, format, ##__VA_ARGS__); \ + } \ + } \ + } while (0) + +static inline size_t +get_page_size(void) { + size_t page_size = 0; + if (page_size == 0) { +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + page_size = si.dwPageSize; +#else + page_size = (size_t)getpagesize(); +#endif + } + return page_size; +} + +typedef struct page_cache_entry { + uintptr_t page_addr; // page-aligned base address + char *data; + int valid; + struct page_cache_entry *next; +} page_cache_entry_t; + +#define MAX_PAGES 1024 + // Define a platform-independent process handle structure typedef struct { pid_t pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX + mach_port_t task; +#elif defined(MS_WINDOWS) HANDLE hProcess; +#elif defined(__linux__) + int memfd; #endif + page_cache_entry_t pages[MAX_PAGES]; + Py_ssize_t page_size; } proc_handle_t; +static void +_Py_RemoteDebug_FreePageCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + PyMem_RawFree(handle->pages[i].data); + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } +} + +UNUSED static void +_Py_RemoteDebug_ClearCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].valid = 0; + } +} + +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX +static mach_port_t pid_to_task(pid_t pid); +#endif + // Initialize the process handle static int _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX + handle->task = pid_to_task(handle->pid); + if (handle->task == 0) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize macOS process handle"); + return -1; + } +#elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, FALSE, pid); if (handle->hProcess == NULL) { PyErr_SetFromWindowsErr(0); + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to initialize Windows process handle"); return -1; } +#elif defined(__linux__) + handle->memfd = -1; #endif + handle->page_size = get_page_size(); + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } return 0; } @@ -105,11 +193,17 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { CloseHandle(handle->hProcess); handle->hProcess = NULL; } +#elif defined(__linux__) + if (handle->memfd != -1) { + close(handle->memfd); + handle->memfd = -1; + } #endif handle->pid = 0; + _Py_RemoteDebug_FreePageCache(handle); } -#if defined(__APPLE__) && TARGET_OS_OSX +#if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX static uintptr_t return_section_address64( @@ -148,8 +242,10 @@ return_section_address64( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 64-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -169,9 +265,6 @@ return_section_address64( cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -212,8 +305,10 @@ return_section_address32( &object_name ); if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_region failed while parsing 32-bit Mach-O binary " + "at base address 0x%lx (kern_return_t: %d)", + base, ret); return 0; } } @@ -233,9 +328,6 @@ return_section_address32( cmd = (struct segment_command*)((void*)cmd + cmd->cmdsize); } - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -253,8 +345,20 @@ return_section_address_fat( int is_abi64; size_t cpu_size = sizeof(cpu), abi64_size = sizeof(is_abi64); - sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0); - sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0); + if (sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU type via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } + if (sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to determine CPU ABI capability via sysctlbyname " + "for fat binary analysis at 0x%lx: %s", + base, strerror(errno)); + return 0; + } cpu |= is_abi64 * CPU_ARCH_ABI64; @@ -285,13 +389,18 @@ return_section_address_fat( return return_section_address64(section, proc_ref, base, (void*)hdr); default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "Unknown Mach-O magic number 0x%x in fat binary architecture %u at base 0x%lx", + hdr->magic, i, base); return 0; } } } - PyErr_SetString(PyExc_RuntimeError, "No matching architecture found in fat binary.\n"); + PyErr_Format(PyExc_RuntimeError, + "No matching architecture found for CPU type 0x%x " + "in fat binary at base 0x%lx (%u architectures examined)", + cpu, base, nfat_arch); return 0; } @@ -300,20 +409,26 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ { int fd = open(path, O_RDONLY); if (fd == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot open binary file '%s' for section '%s' search: %s", + path, secname, strerror(errno)); return 0; } struct stat fs; if (fstat(fd, &fs) == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot get file size for binary '%s' during section '%s' search: %s", + path, secname, strerror(errno)); close(fd); return 0; } void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { - PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); + PyErr_Format(PyExc_OSError, + "Cannot memory map binary file '%s' (size: %lld bytes) for section '%s' search: %s", + path, (long long)fs.st_size, secname, strerror(errno)); close(fd); return 0; } @@ -335,13 +450,22 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ result = return_section_address_fat(secname, proc_ref, base, map); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + PyErr_Format(PyExc_RuntimeError, + "Unrecognized Mach-O magic number 0x%x in binary file '%s' for section '%s' search", + magic, path, secname); break; } - munmap(map, fs.st_size); + if (munmap(map, fs.st_size) != 0) { + PyErr_Format(PyExc_OSError, + "Failed to unmap binary file '%s' (size: %lld bytes): %s", + path, (long long)fs.st_size, strerror(errno)); + result = 0; + } if (close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close binary file '%s': %s", + path, strerror(errno)); result = 0; } return result; @@ -356,7 +480,10 @@ pid_to_task(pid_t pid) result = task_for_pid(mach_task_self(), pid, &task); if (result != KERN_SUCCESS) { - PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d (kern_return_t: %d). " + "This typically requires running as root or having the 'com.apple.system-task-ports' entitlement.", + pid, result); return 0; } return task; @@ -373,13 +500,15 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s mach_port_t proc_ref = pid_to_task(handle->pid); if (proc_ref == 0) { if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); + PyErr_Format(PyExc_PermissionError, + "Cannot get task port for PID %d during section search", + handle->pid); } return 0; } - int match_found = 0; char map_filename[MAXPATHLEN + 1]; + while (mach_vm_region( proc_ref, &address, @@ -389,6 +518,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s &count, &object_name) == KERN_SUCCESS) { + if ((region_info.protection & VM_PROT_READ) == 0 || (region_info.protection & VM_PROT_EXECUTE) == 0) { address += size; @@ -409,21 +539,21 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s filename = map_filename; // No path, use the whole string } - if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { - match_found = 1; - return search_section_in_file( + if (strncmp(filename, substr, strlen(substr)) == 0) { + uintptr_t result = search_section_in_file( secname, map_filename, address, size, proc_ref); + if (result != 0) { + return result; + } } address += size; } - PyErr_SetString(PyExc_RuntimeError, - "mach_vm_region failed to find the section"); return 0; } -#endif // (__APPLE__ && TARGET_OS_OSX) +#endif // (__APPLE__ && defined(TARGET_OS_OSX) && TARGET_OS_OSX) #if defined(__linux__) && HAVE_PROCESS_VM_READV static uintptr_t @@ -442,24 +572,38 @@ search_elf_file_for_section( int fd = open(elf_file, O_RDONLY); if (fd < 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open ELF file '%s' for section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } struct stat file_stats; if (fstat(fd, &file_stats) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot get file size for ELF file '%s' during section '%s' search: %s", + elf_file, secname, strerror(errno)); goto exit; } file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (file_memory == MAP_FAILED) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot memory map ELF file '%s' (size: %lld bytes) for section '%s' search: %s", + elf_file, (long long)file_stats.st_size, secname, strerror(errno)); goto exit; } Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; + // Validate ELF header + if (elf_header->e_shstrndx >= elf_header->e_shnum) { + PyErr_Format(PyExc_RuntimeError, + "Invalid ELF file '%s': string table index %u >= section count %u", + elf_file, elf_header->e_shstrndx, elf_header->e_shnum); + goto exit; + } + Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); Elf_Shdr* shstrtab_section = &section_header_table[elf_header->e_shstrndx]; @@ -476,6 +620,10 @@ search_elf_file_for_section( } } + if (section == NULL) { + goto exit; + } + Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); // Find the first PT_LOAD segment Elf_Phdr* first_load_segment = NULL; @@ -486,18 +634,25 @@ search_elf_file_for_section( } } - if (section != NULL && first_load_segment != NULL) { - uintptr_t elf_load_addr = first_load_segment->p_vaddr - - (first_load_segment->p_vaddr % first_load_segment->p_align); - result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + if (first_load_segment == NULL) { + PyErr_Format(PyExc_RuntimeError, + "No PT_LOAD segment found in ELF file '%s' (%u program headers examined)", + elf_file, elf_header->e_phnum); + goto exit; } + uintptr_t elf_load_addr = first_load_segment->p_vaddr + - (first_load_segment->p_vaddr % first_load_segment->p_align); + result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + exit: if (file_memory != NULL) { munmap(file_memory, file_stats.st_size); } if (fd >= 0 && close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close ELF file '%s': %s", + elf_file, strerror(errno)); result = 0; } return result; @@ -511,7 +666,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c FILE* maps_file = fopen(maps_file_path, "r"); if (maps_file == NULL) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Cannot open process memory map file '%s' for PID %d section search: %s", + maps_file_path, handle->pid, strerror(errno)); return 0; } @@ -520,11 +677,14 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c char *line = PyMem_Malloc(linesz); if (!line) { fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate memory for reading process map file '%s'", + maps_file_path); return 0; } uintptr_t retval = 0; + while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { linelen = strlen(line); if (line[linelen - 1] != '\n') { @@ -535,7 +695,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c if (!biggerline) { PyMem_Free(line); fclose(maps_file); - PyErr_NoMemory(); + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot reallocate memory while reading process map file '%s' (attempted size: %zu)", + maps_file_path, linesz); return 0; } line = biggerline; @@ -575,7 +737,9 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c PyMem_Free(line); if (fclose(maps_file) != 0) { - PyErr_SetFromErrno(PyExc_OSError); + PyErr_Format(PyExc_OSError, + "Failed to close process map file '%s': %s", + maps_file_path, strerror(errno)); retval = 0; } @@ -591,11 +755,20 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot open PE file for section '%s' analysis (error %lu)", + secname, error); return NULL; } + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); if (!hMap) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot create file mapping for PE file section '%s' analysis (error %lu)", + secname, error); CloseHandle(hFile); return NULL; } @@ -603,6 +776,10 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); if (!mapView) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_OSError, + "Cannot map view of PE file for section '%s' analysis (error %lu)", + secname, error); CloseHandle(hMap); CloseHandle(hFile); return NULL; @@ -610,7 +787,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid DOS signature (0x%x) in PE file for section '%s' analysis (expected 0x%x)", + pDOSHeader->e_magic, secname, IMAGE_DOS_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -619,7 +798,9 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); + PyErr_Format(PyExc_RuntimeError, + "Invalid NT signature (0x%lx) in PE file for section '%s' analysis (expected 0x%lx)", + pNTHeaders->Signature, secname, IMAGE_NT_SIGNATURE); UnmapViewOfFile(mapView); CloseHandle(hMap); CloseHandle(hFile); @@ -653,7 +834,12 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); if (hProcSnap == INVALID_HANDLE_VALUE) { - PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); + PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + PyErr_Format(PyExc_PermissionError, + "Unable to create module snapshot for PID %d section '%s' " + "search (error %lu). Check permissions or PID validity", + handle->pid, secname, error); return 0; } @@ -672,6 +858,7 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const } CloseHandle(hProcSnap); + return (uintptr_t)runtime_addr; } @@ -689,7 +876,9 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Windows platform", + handle->pid); _PyErr_ChainExceptions1(exc); } #elif defined(__linux__) @@ -698,16 +887,28 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) if (address == 0) { // Error out: 'python' substring covers both executable and DLL PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d on Linux platform", + handle->pid); _PyErr_ChainExceptions1(exc); } -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "PyRuntime", "libpython"); - if (address == 0) { - // TODO: Differentiate between not found and error + const char* candidates[] = {"libpython", "python", "Python", NULL}; + for (const char** candidate = candidates; *candidate; candidate++) { PyErr_Clear(); - address = search_map_for_section(handle, "PyRuntime", "python"); + address = search_map_for_section(handle, "PyRuntime", *candidate); + if (address != 0) { + break; + } + } + if (address == 0) { + PyObject *exc = PyErr_GetRaisedException(); + PyErr_Format(PyExc_RuntimeError, + "Failed to find the PyRuntime section in process %d " + "on macOS platform (tried both libpython and python)", + handle->pid); + _PyErr_ChainExceptions1(exc); } #else Py_UNREACHABLE(); @@ -716,6 +917,61 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) return address; } +#if defined(__linux__) && HAVE_PROCESS_VM_READV + +static int +open_proc_mem_fd(proc_handle_t *handle) +{ + char mem_file_path[64]; + sprintf(mem_file_path, "/proc/%d/mem", handle->pid); + + handle->memfd = open(mem_file_path, O_RDWR); + if (handle->memfd == -1) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "failed to open file %s: %s", mem_file_path, strerror(errno)); + return -1; + } + return 0; +} + +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +static int +read_remote_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t read_bytes = 0; + + do { + local[0].iov_base = (char*)dst + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + read_bytes = preadv(handle->memfd, local, 1, offset); + if (read_bytes < 0) { + PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "preadv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); + return -1; + } + + result += read_bytes; + } while ((size_t)read_bytes != local[0].iov_len); + return 0; +} + +#endif // __linux__ + // Platform-independent memory read function static int _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) @@ -726,12 +982,20 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address do { if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); + _set_debug_exception_cause(PyExc_OSError, + "ReadProcessMemory failed for PID %d at address 0x%lx " + "(size %zu, partial read %zu bytes): Windows error %lu", + handle->pid, remote_address + result, len - result, result, error); return -1; } result += read_bytes; } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -745,17 +1009,24 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); if (read_bytes < 0) { + if (errno == ENOSYS) { + return read_remote_memory_fallback(handle, remote_address, len, dst); + } PyErr_SetFromErrno(PyExc_OSError); + _set_debug_exception_cause(PyExc_OSError, + "process_vm_readv failed for PID %d at address 0x%lx " + "(size %zu, partial read %zd bytes): %s", + handle->pid, remote_address + result, len - result, result, strerror(errno)); return -1; } result += read_bytes; } while ((size_t)read_bytes != local[0].iov_len); return 0; -#elif defined(__APPLE__) && TARGET_OS_OSX +#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), + handle->task, (mach_vm_address_t)remote_address, len, (mach_vm_address_t)dst, @@ -764,13 +1035,22 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address if (kr != KERN_SUCCESS) { switch (kr) { case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + PyErr_Format(PyExc_PermissionError, + "Memory protection failure reading from PID %d at address " + "0x%lx (size %zu): insufficient permissions", + handle->pid, remote_address, len); break; case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + PyErr_Format(PyExc_ValueError, + "Invalid argument to mach_vm_read_overwrite for PID %d at " + "address 0x%lx (size %zu)", + handle->pid, remote_address, len); break; default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + PyErr_Format(PyExc_RuntimeError, + "mach_vm_read_overwrite failed for PID %d at address 0x%lx " + "(size %zu): kern_return_t %d", + handle->pid, remote_address, len, kr); } return -1; } @@ -780,6 +1060,62 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #endif } +UNUSED static int +_Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, + uintptr_t addr, + size_t size, + void *out) +{ + size_t page_size = handle->page_size; + uintptr_t page_base = addr & ~(page_size - 1); + size_t offset_in_page = addr - page_base; + + if (offset_in_page + size > page_size) { + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); + } + + // Search for valid cached page + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (entry->valid && entry->page_addr == page_base) { + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + + // Find reusable slot + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (!entry->valid) { + if (entry->data == NULL) { + entry->data = PyMem_RawMalloc(page_size); + if (entry->data == NULL) { + _set_debug_exception_cause(PyExc_MemoryError, + "Cannot allocate %zu bytes for page cache entry " + "during read from PID %d at address 0x%lx", + page_size, handle->pid, addr); + return -1; + } + } + + if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) { + // Try to just copy the exact ammount as a fallback + PyErr_Clear(); + goto fallback; + } + + entry->page_addr = page_base; + entry->valid = 1; + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + +fallback: + // Cache full — fallback to uncached read + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); +} + static int _Py_RemoteDebug_ReadDebugOffsets( proc_handle_t *handle, @@ -789,13 +1125,16 @@ _Py_RemoteDebug_ReadDebugOffsets( *runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (!*runtime_start_address) { if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get PyRuntime address"); + PyErr_Format(PyExc_RuntimeError, + "Failed to locate PyRuntime address for PID %d", + handle->pid); } + _set_debug_exception_cause(PyExc_RuntimeError, "PyRuntime address lookup failed during debug offsets initialization"); return -1; } size_t size = sizeof(struct _Py_DebugOffsets); if (0 != _Py_RemoteDebug_ReadRemoteMemory(handle, *runtime_start_address, size, debug_offsets)) { + _set_debug_exception_cause(PyExc_RuntimeError, "Failed to read debug offsets structure from remote process"); return -1; } return 0; diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index dd55b7812d4dee..71ffb17ed68b1d 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -19,11 +19,44 @@ cleanup_proc_handle(proc_handle_t *handle) { } static int -read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* dst) +read_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) { return _Py_RemoteDebug_ReadRemoteMemory(handle, remote_address, len, dst); } +// Why is pwritev not guarded? Except on Android API level 23 (no longer +// supported), HAVE_PROCESS_VM_READV is sufficient. +#if defined(__linux__) && HAVE_PROCESS_VM_READV +static int +write_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) +{ + if (handle->memfd == -1) { + if (open_proc_mem_fd(handle) < 0) { + return -1; + } + } + + struct iovec local[1]; + Py_ssize_t result = 0; + Py_ssize_t written = 0; + + do { + local[0].iov_base = (char*)src + result; + local[0].iov_len = len - result; + off_t offset = remote_address + result; + + written = pwritev(handle->memfd, local, 1, offset); + if (written < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += written; + } while ((size_t)written != local[0].iov_len); + return 0; +} +#endif // __linux__ + static int write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { @@ -39,6 +72,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const } while (result < len); return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV + if (handle->memfd != -1) { + return write_memory_fallback(handle, remote_address, len, src); + } struct iovec local[1]; struct iovec remote[1]; Py_ssize_t result = 0; @@ -52,6 +88,9 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const written = process_vm_writev(handle->pid, local, 1, remote, 1, 0); if (written < 0) { + if (errno == ENOSYS) { + return write_memory_fallback(handle, remote_address, len, src); + } PyErr_SetFromErrno(PyExc_OSError); return -1; } @@ -196,7 +235,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc int is_remote_debugging_enabled = 0; if (0 != read_memory( handle, - interpreter_state_addr + debug_offsets.debugger_support.remote_debugging_enabled, + interpreter_state_addr + (uintptr_t)debug_offsets.debugger_support.remote_debugging_enabled, sizeof(int), &is_remote_debugging_enabled)) { @@ -216,7 +255,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc if (tid != 0) { if (0 != read_memory( handle, - interpreter_state_addr + debug_offsets.interpreter_state.threads_head, + interpreter_state_addr + (uintptr_t)debug_offsets.interpreter_state.threads_head, sizeof(void*), &thread_state_addr)) { @@ -225,7 +264,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc while (thread_state_addr != 0) { if (0 != read_memory( handle, - thread_state_addr + debug_offsets.thread_state.native_thread_id, + thread_state_addr + (uintptr_t)debug_offsets.thread_state.native_thread_id, sizeof(this_tid), &this_tid)) { @@ -238,7 +277,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc if (0 != read_memory( handle, - thread_state_addr + debug_offsets.thread_state.next, + thread_state_addr + (uintptr_t)debug_offsets.thread_state.next, sizeof(void*), &thread_state_addr)) { @@ -255,7 +294,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } else { if (0 != read_memory( handle, - interpreter_state_addr + debug_offsets.interpreter_state.threads_main, + interpreter_state_addr + (uintptr_t)debug_offsets.interpreter_state.threads_main, sizeof(void*), &thread_state_addr)) { @@ -307,7 +346,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc uintptr_t eval_breaker; if (0 != read_memory( handle, - thread_state_addr + debug_offsets.debugger_support.eval_breaker, + thread_state_addr + (uintptr_t)debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker)) { diff --git a/Python/specialize.c b/Python/specialize.c index bbe725c8ec8381..4ba854d2ae8c3a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -118,6 +118,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); + err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward"); err += add_stat_dict(stats, CALL, "call"); err += add_stat_dict(stats, CALL_KW, "call_kw"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); @@ -629,6 +630,7 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, int enable_counters #define SPEC_FAIL_CALL_INIT_NOT_PYTHON 21 #define SPEC_FAIL_CALL_PEP_523 22 #define SPEC_FAIL_CALL_BOUND_METHOD 23 +#define SPEC_FAIL_CALL_VECTORCALL 24 #define SPEC_FAIL_CALL_CLASS_MUTABLE 26 #define SPEC_FAIL_CALL_METHOD_WRAPPER 28 #define SPEC_FAIL_CALL_OPERATOR_WRAPPER 29 @@ -934,8 +936,7 @@ analyze_descriptor_load(PyTypeObject *type, PyObject *name, PyObject **descr, un PyObject *getattr = _PyType_Lookup(type, &_Py_ID(__getattr__)); has_getattr = getattr != NULL; if (has_custom_getattribute) { - if (getattro_slot == _Py_slot_tp_getattro && - !has_getattr && + if (!has_getattr && Py_IS_TYPE(getattribute, &PyFunction_Type)) { *descr = getattribute; *tp_version = ga_version; @@ -1258,12 +1259,6 @@ do_specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* return -1; case GETATTRIBUTE_IS_PYTHON_FUNCTION: { - #ifndef Py_GIL_DISABLED - // In free-threaded builds it's possible for tp_getattro to change - // after the call to analyze_descriptor. That is fine: the version - // guard will fail. - assert(type->tp_getattro == _Py_slot_tp_getattro); - #endif assert(Py_IS_TYPE(descr, &PyFunction_Type)); _PyLoadMethodCache *lm_cache = (_PyLoadMethodCache *)(instr + 1); if (!function_check_args(descr, 2, LOAD_ATTR)) { @@ -2077,6 +2072,10 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); return -1; } + if (func->vectorcall != _PyFunction_Vectorcall) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_VECTORCALL); + return -1; + } int argcount = -1; if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); @@ -2116,6 +2115,10 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); return -1; } + if (func->vectorcall != _PyFunction_Vectorcall) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_VECTORCALL); + return -1; + } if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); return -1; diff --git a/Python/stackrefs.c b/Python/stackrefs.c index 979a6b1c62820a..69d4e8b943159f 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -1,4 +1,3 @@ - #include "Python.h" #include "pycore_object.h" @@ -34,6 +33,7 @@ make_table_entry(PyObject *obj, const char *filename, int linenumber) result->filename = filename; result->linenumber = linenumber; result->filename_borrow = NULL; + result->linenumber_borrow = 0; return result; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 00dce4527fbb90..545b130836e26c 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1183,9 +1183,10 @@ sys__settraceallthreads(PyObject *module, PyObject *arg) argument = arg; } - - PyEval_SetTraceAllThreads(func, argument); - + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyEval_SetTraceAllThreads(interp, func, argument) < 0) { + return NULL; + } Py_RETURN_NONE; } @@ -1263,8 +1264,10 @@ sys__setprofileallthreads(PyObject *module, PyObject *arg) argument = arg; } - PyEval_SetProfileAllThreads(func, argument); - + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyEval_SetProfileAllThreads(interp, func, argument) < 0) { + return NULL; + } Py_RETURN_NONE; } @@ -1670,6 +1673,9 @@ _sys_getwindowsversion_from_kernel32(void) !GetFileVersionInfoW(kernel32_path, 0, verblock_size, verblock) || !VerQueryValueW(verblock, L"", (LPVOID)&ffi, &ffi_len)) { PyErr_SetFromWindowsErr(0); + if (verblock) { + PyMem_RawFree(verblock); + } return NULL; } @@ -2367,14 +2373,14 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) return NULL; } } - else if (strcmp(backend, "perf_jit") == 0) { - _PyPerf_Callbacks cur_cb; - _PyPerfTrampoline_GetCallbacks(&cur_cb); - if (cur_cb.write_state != _Py_perfmap_jit_callbacks.write_state) { - if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_jit_callbacks) < 0 ) { - PyErr_SetString(PyExc_ValueError, "can't activate perf jit trampoline"); - return NULL; - } + } + else if (strcmp(backend, "perf_jit") == 0) { + _PyPerf_Callbacks cur_cb; + _PyPerfTrampoline_GetCallbacks(&cur_cb); + if (cur_cb.write_state != _Py_perfmap_jit_callbacks.write_state) { + if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_jit_callbacks) < 0 ) { + PyErr_SetString(PyExc_ValueError, "can't activate perf jit trampoline"); + return NULL; } } } @@ -2448,26 +2454,63 @@ sys_is_remote_debug_enabled_impl(PyObject *module) #endif } +/*[clinic input] +sys.remote_exec + + pid: int + script: object + +Executes a file containing Python code in a given remote Python process. + +This function returns immediately, and the code will be executed by the +target process's main thread at the next available opportunity, similarly +to how signals are handled. There is no interface to determine when the +code has been executed. The caller is responsible for making sure that +the file still exists whenever the remote process tries to read it and that +it hasn't been overwritten. + +The remote process must be running a CPython interpreter of the same major +and minor version as the local process. If either the local or remote +interpreter is pre-release (alpha, beta, or release candidate) then the +local and remote interpreters must be the same exact version. + +Args: + pid (int): The process ID of the target Python process. + script (str|bytes): The path to a file containing + the Python code to be executed. +[clinic start generated code]*/ + static PyObject * -sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) +sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) +/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/ { - const char *debugger_script_path = PyUnicode_AsUTF8(script); - if (debugger_script_path == NULL) { + PyObject *path; + const char *debugger_script_path; + + if (PyUnicode_FSConverter(script, &path) == 0) { + return NULL; + } + + if (PySys_Audit("sys.remote_exec", "iO", pid, script) < 0) { return NULL; } + debugger_script_path = PyBytes_AS_STRING(path); #ifdef MS_WINDOWS + PyObject *unicode_path; + if (PyUnicode_FSDecoder(path, &unicode_path) < 0) { + goto error; + } // Use UTF-16 (wide char) version of the path for permission checks - wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(script, NULL); + wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(unicode_path, NULL); + Py_DECREF(unicode_path); if (debugger_script_path_w == NULL) { - return NULL; + goto error; } - - // Check file attributes using wide character version (W) instead of ANSI (A) DWORD attr = GetFileAttributesW(debugger_script_path_w); - PyMem_Free(debugger_script_path_w); if (attr == INVALID_FILE_ATTRIBUTES) { DWORD err = GetLastError(); + PyMem_Free(debugger_script_path_w); if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); } @@ -2475,11 +2518,12 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) PyErr_SetString(PyExc_PermissionError, "Script file cannot be read"); } else { - PyErr_SetFromWindowsErr(0); + PyErr_SetFromWindowsErr(err); } - return NULL; + goto error; } -#else + PyMem_Free(debugger_script_path_w); +#else // MS_WINDOWS if (access(debugger_script_path, F_OK | R_OK) != 0) { switch (errno) { case ENOENT: @@ -2491,54 +2535,19 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) default: PyErr_SetFromErrno(PyExc_OSError); } - return NULL; + goto error; } -#endif - +#endif // MS_WINDOWS if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) { - return NULL; + goto error; } + Py_DECREF(path); Py_RETURN_NONE; -} - -/*[clinic input] -sys.remote_exec - pid: int - script: object - -Executes a file containing Python code in a given remote Python process. - -This function returns immediately, and the code will be executed by the -target process's main thread at the next available opportunity, similarly -to how signals are handled. There is no interface to determine when the -code has been executed. The caller is responsible for making sure that -the file still exists whenever the remote process tries to read it and that -it hasn't been overwritten. - -The remote process must be running a CPython interpreter of the same major -and minor version as the local process. If either the local or remote -interpreter is pre-release (alpha, beta, or release candidate) then the -local and remote interpreters must be the same exact version. - -Args: - pid (int): The process ID of the target Python process. - script (str|bytes): The path to a file containing - the Python code to be executed. -[clinic start generated code]*/ - -static PyObject * -sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) -/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/ -{ - PyObject *ret = NULL; - PyObject *path; - if (PyUnicode_FSDecoder(script, &path)) { - ret = sys_remote_exec_unicode_path(module, pid, path); - Py_DECREF(path); - } - return ret; +error: + Py_DECREF(path); + return NULL; } @@ -2634,6 +2643,47 @@ sys__baserepl_impl(PyObject *module) Py_RETURN_NONE; } +/*[clinic input] +sys._clear_type_descriptors + + type: object(subclass_of='&PyType_Type') + / + +Private function for clearing certain descriptors from a type's dictionary. + +See gh-135228 for context. +[clinic start generated code]*/ + +static PyObject * +sys__clear_type_descriptors_impl(PyObject *module, PyObject *type) +/*[clinic end generated code: output=5ad17851b762b6d9 input=dc536c97fde07251]*/ +{ + PyTypeObject *typeobj = (PyTypeObject *)type; + if (_PyType_HasFeature(typeobj, Py_TPFLAGS_IMMUTABLETYPE)) { + PyErr_SetString(PyExc_TypeError, "argument is immutable"); + return NULL; + } + PyObject *dict = _PyType_GetDict(typeobj); + PyObject *dunder_dict = NULL; + if (PyDict_Pop(dict, &_Py_ID(__dict__), &dunder_dict) < 0) { + return NULL; + } + PyObject *dunder_weakref = NULL; + if (PyDict_Pop(dict, &_Py_ID(__weakref__), &dunder_weakref) < 0) { + PyType_Modified(typeobj); + Py_XDECREF(dunder_dict); + return NULL; + } + PyType_Modified(typeobj); + // We try to hold onto a reference to these until after we call + // PyType_Modified(), in case their deallocation triggers somer user code + // that tries to do something to the type. + Py_XDECREF(dunder_dict); + Py_XDECREF(dunder_weakref); + Py_RETURN_NONE; +} + + /*[clinic input] sys._is_gil_enabled -> bool @@ -2736,20 +2786,31 @@ PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) { } char buf[4096]; PyThread_acquire_lock(perf_map_state.map_lock, 1); - int fflush_result = 0, result = 0; + int result = 0; while (1) { size_t bytes_read = fread(buf, 1, sizeof(buf), from); + if (bytes_read == 0) { + if (ferror(from)) { + result = -1; + } + break; + } + size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map); - fflush_result = fflush(perf_map_state.perf_map); - if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) { + if (bytes_written < bytes_read) { result = -1; - goto close_and_release; + break; + } + + if (fflush(perf_map_state.perf_map) != 0) { + result = -1; + break; } + if (bytes_read < sizeof(buf) && feof(from)) { - goto close_and_release; + break; } } -close_and_release: fclose(from); PyThread_release_lock(perf_map_state.map_lock); return result; @@ -2830,6 +2891,7 @@ static PyMethodDef sys_methods[] = { SYS__STATS_DUMP_METHODDEF #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF + SYS__CLEAR_TYPE_DESCRIPTORS_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel @@ -3600,6 +3662,18 @@ make_impl_info(PyObject *version_info) goto error; #endif + // PEP-734 +#if defined(__wasi__) || defined(__EMSCRIPTEN__) + // It is not enabled on WASM builds just yet + value = Py_False; +#else + value = Py_True; +#endif + res = PyDict_SetItemString(impl_info, "supports_isolated_interpreters", value); + if (res < 0) { + goto error; + } + /* dict ready */ ns = _PyNamespace_New(impl_info); diff --git a/Python/traceback.c b/Python/traceback.c index c06cb1a59089e2..86273da3c8ae3c 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -70,6 +70,13 @@ class traceback "PyTracebackObject *" "&PyTraceback_Type" #include "clinic/traceback.c.h" + +#ifdef MS_WINDOWS +typedef HRESULT (WINAPI *PF_GET_THREAD_DESCRIPTION)(HANDLE, PCWSTR*); +static PF_GET_THREAD_DESCRIPTION pGetThreadDescription = NULL; +#endif + + static PyObject * tb_create_raw(PyTracebackObject *next, PyFrameObject *frame, int lasti, int lineno) @@ -974,16 +981,72 @@ _Py_DumpASCII(int fd, PyObject *text) } } + +#ifdef MS_WINDOWS +static void +_Py_DumpWideString(int fd, wchar_t *str) +{ + Py_ssize_t size = wcslen(str); + int truncated; + if (MAX_STRING_LENGTH < size) { + size = MAX_STRING_LENGTH; + truncated = 1; + } + else { + truncated = 0; + } + + for (Py_ssize_t i=0; i < size; i++) { + Py_UCS4 ch = str[i]; + if (' ' <= ch && ch <= 126) { + /* printable ASCII character */ + dump_char(fd, (char)ch); + } + else if (ch <= 0xff) { + PUTS(fd, "\\x"); + _Py_DumpHexadecimal(fd, ch, 2); + } + else if (Py_UNICODE_IS_HIGH_SURROGATE(ch) + && Py_UNICODE_IS_LOW_SURROGATE(str[i+1])) { + ch = Py_UNICODE_JOIN_SURROGATES(ch, str[i+1]); + i++; // Skip the low surrogate character + PUTS(fd, "\\U"); + _Py_DumpHexadecimal(fd, ch, 8); + } + else { + Py_BUILD_ASSERT(sizeof(wchar_t) == 2); + PUTS(fd, "\\u"); + _Py_DumpHexadecimal(fd, ch, 4); + } + } + + if (truncated) { + PUTS(fd, "..."); + } +} +#endif + + /* Write a frame into the file fd: "File "xxx", line xxx in xxx". - This function is signal safe. */ + This function is signal safe. -static void + Return 0 on success. Return -1 if the frame is invalid. */ + +static int dump_frame(int fd, _PyInterpreterFrame *frame) { - assert(frame->owner < FRAME_OWNED_BY_INTERPRETER); + if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { + /* Ignore trampoline frame */ + return 0; + } + + PyCodeObject *code = _PyFrame_SafeGetCode(frame); + if (code == NULL) { + return -1; + } - PyCodeObject *code =_PyFrame_GetCode(frame); + int res = 0; PUTS(fd, " File "); if (code->co_filename != NULL && PyUnicode_Check(code->co_filename)) @@ -991,29 +1054,36 @@ dump_frame(int fd, _PyInterpreterFrame *frame) PUTS(fd, "\""); _Py_DumpASCII(fd, code->co_filename); PUTS(fd, "\""); - } else { + } + else { PUTS(fd, "???"); + res = -1; } - int lineno = PyUnstable_InterpreterFrame_GetLine(frame); PUTS(fd, ", line "); + int lasti = _PyFrame_SafeGetLasti(frame); + int lineno = -1; + if (lasti >= 0) { + lineno = _PyCode_SafeAddr2Line(code, lasti); + } if (lineno >= 0) { _Py_DumpDecimal(fd, (size_t)lineno); } else { PUTS(fd, "???"); + res = -1; } - PUTS(fd, " in "); - if (code->co_name != NULL - && PyUnicode_Check(code->co_name)) { + PUTS(fd, " in "); + if (code->co_name != NULL && PyUnicode_Check(code->co_name)) { _Py_DumpASCII(fd, code->co_name); } else { PUTS(fd, "???"); + res = -1; } - PUTS(fd, "\n"); + return res; } static int @@ -1025,6 +1095,9 @@ tstate_is_freed(PyThreadState *tstate) if (_PyMem_IsPtrFreed(tstate->interp)) { return 1; } + if (_PyMem_IsULongFreed(tstate->thread_id)) { + return 1; + } return 0; } @@ -1044,7 +1117,7 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) } if (tstate_is_freed(tstate)) { - PUTS(fd, " <tstate is freed>\n"); + PUTS(fd, " <freed thread state>\n"); return; } @@ -1056,17 +1129,6 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) unsigned int depth = 0; while (1) { - if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { - /* Trampoline frame */ - frame = frame->previous; - if (frame == NULL) { - break; - } - - /* Can't have more than one shim frame in a row */ - assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - } - if (MAX_FRAME_DEPTH <= depth) { if (MAX_FRAME_DEPTH < depth) { PUTS(fd, "plus "); @@ -1076,8 +1138,20 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) break; } - dump_frame(fd, frame); - frame = frame->previous; + if (_PyMem_IsPtrFreed(frame)) { + PUTS(fd, " <freed frame>\n"); + break; + } + // Read frame->previous early since memory can be freed during + // dump_frame() + _PyInterpreterFrame *previous = frame->previous; + + if (dump_frame(fd, frame) < 0) { + PUTS(fd, " <invalid frame>\n"); + break; + } + + frame = previous; if (frame == NULL) { break; } @@ -1108,23 +1182,12 @@ _Py_DumpTraceback(int fd, PyThreadState *tstate) # endif #endif -/* Write the thread identifier into the file 'fd': "Current thread 0xHHHH:\" if - is_current is true, "Thread 0xHHHH:\n" otherwise. - - This function is signal safe. */ +// Write the thread name static void -write_thread_id(int fd, PyThreadState *tstate, int is_current) +write_thread_name(int fd, PyThreadState *tstate) { - if (is_current) - PUTS(fd, "Current thread 0x"); - else - PUTS(fd, "Thread 0x"); - _Py_DumpHexadecimal(fd, - tstate->thread_id, - sizeof(unsigned long) * 2); - - // Write the thread name +#ifndef MS_WINDOWS #if defined(HAVE_PTHREAD_GETNAME_NP) || defined(HAVE_PTHREAD_GET_NAME_NP) char name[100]; pthread_t thread = (pthread_t)tstate->thread_id; @@ -1143,6 +1206,51 @@ write_thread_id(int fd, PyThreadState *tstate, int is_current) } } #endif +#else + // Windows implementation + if (pGetThreadDescription == NULL) { + return; + } + + HANDLE thread = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, tstate->thread_id); + if (thread == NULL) { + return; + } + + wchar_t *name; + HRESULT hr = pGetThreadDescription(thread, &name); + if (!FAILED(hr)) { + if (name[0] != 0) { + PUTS(fd, " ["); + _Py_DumpWideString(fd, name); + PUTS(fd, "]"); + } + LocalFree(name); + } + CloseHandle(thread); +#endif +} + + +/* Write the thread identifier into the file 'fd': "Current thread 0xHHHH:\" if + is_current is true, "Thread 0xHHHH:\n" otherwise. + + This function is signal safe (except on Windows). */ + +static void +write_thread_id(int fd, PyThreadState *tstate, int is_current) +{ + if (is_current) + PUTS(fd, "Current thread 0x"); + else + PUTS(fd, "Thread 0x"); + _Py_DumpHexadecimal(fd, + tstate->thread_id, + sizeof(unsigned long) * 2); + + if (!_PyMem_IsULongFreed(tstate->thread_id)) { + write_thread_name(fd, tstate); + } PUTS(fd, " (most recent call first):\n"); } @@ -1200,7 +1308,6 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, return "unable to get the thread head state"; /* Dump the traceback of each thread */ - tstate = PyInterpreterState_ThreadHead(interp); unsigned int nthreads = 0; _Py_BEGIN_SUPPRESS_IPH do @@ -1211,11 +1318,18 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, PUTS(fd, "...\n"); break; } + + if (tstate_is_freed(tstate)) { + PUTS(fd, "<freed thread state>\n"); + break; + } + write_thread_id(fd, tstate, tstate == current_tstate); if (tstate == current_tstate && tstate->interp->gc.collecting) { PUTS(fd, " Garbage-collecting\n"); } dump_traceback(fd, tstate, 0); + tstate = PyThreadState_Next(tstate); nthreads++; } while (tstate != NULL); @@ -1327,3 +1441,30 @@ _Py_DumpStack(int fd) PUTS(fd, " <cannot get C stack on this system>\n"); } #endif + +void +_Py_InitDumpStack(void) +{ +#ifdef CAN_C_BACKTRACE + // gh-137185: Call backtrace() once to force libgcc to be loaded early. + void *callstack[1]; + (void)backtrace(callstack, 1); +#endif +} + + +void +_Py_DumpTraceback_Init(void) +{ +#ifdef MS_WINDOWS + if (pGetThreadDescription != NULL) { + return; + } + + HMODULE kernelbase = GetModuleHandleW(L"kernelbase.dll"); + if (kernelbase != NULL) { + pGetThreadDescription = (PF_GET_THREAD_DESCRIPTION)GetProcAddress( + kernelbase, "GetThreadDescription"); + } +#endif +} diff --git a/README.rst b/README.rst index 897d8995b63d9b..90e1ed6e07931d 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -This is Python version 3.14.0 beta 1 -==================================== +This is Python version 3.14.2 +============================= .. image:: https://github.com/python/cpython/actions/workflows/build.yml/badge.svg?branch=main&event=push :alt: CPython build status on GitHub Actions @@ -153,7 +153,7 @@ Documentation updated daily. It can also be downloaded in many formats for faster access. The documentation -is downloadable in HTML, PDF, and reStructuredText formats; the latter version +is downloadable in HTML, EPUB, and reStructuredText formats; the latter version is primarily for documentation authors, translators, and people with special formatting requirements. @@ -232,4 +232,4 @@ This Python distribution contains *no* GNU General Public License (GPL) code, so it may be used in proprietary projects. There are interfaces to some GNU code but these are entirely optional. -All trademarks referenced herein are property of their respective holders. \ No newline at end of file +All trademarks referenced herein are property of their respective holders. diff --git a/Tools/build/.ruff.toml b/Tools/build/.ruff.toml index fa7689d45dbcb7..996f725fdcb9b5 100644 --- a/Tools/build/.ruff.toml +++ b/Tools/build/.ruff.toml @@ -1,7 +1,7 @@ extend = "../../.ruff.toml" # Inherit the project-wide settings [per-file-target-version] -"deepfreeze.py" = "py310" +"deepfreeze.py" = "py311" # requires `code.co_exceptiontable` "stable_abi.py" = "py311" # requires 'tomllib' [format] @@ -29,7 +29,6 @@ ignore = [ "F541", # f-string without any placeholders "PYI024", # Use `typing.NamedTuple` instead of `collections.namedtuple` "PYI025", # Use `from collections.abc import Set as AbstractSet` - "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` ] [lint.per-file-ignores] diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index 9815bcfe27d995..668db8df0bd181 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -17,9 +17,11 @@ See --help for more information """ + +from __future__ import annotations + import _imp import argparse -import collections import enum import logging import os @@ -29,13 +31,16 @@ import sysconfig import warnings from collections.abc import Iterable -from importlib._bootstrap import _load as bootstrap_load +from importlib._bootstrap import ( # type: ignore[attr-defined] + _load as bootstrap_load, +) from importlib.machinery import ( BuiltinImporter, ExtensionFileLoader, ModuleSpec, ) from importlib.util import spec_from_file_location, spec_from_loader +from typing import NamedTuple SRC_DIR = pathlib.Path(__file__).parent.parent.parent @@ -112,6 +117,7 @@ ) +@enum.unique class ModuleState(enum.Enum): # Makefile state "yes" BUILTIN = "builtin" @@ -123,11 +129,13 @@ class ModuleState(enum.Enum): # disabled by Setup / makesetup rule DISABLED_SETUP = "disabled_setup" - def __bool__(self): + def __bool__(self) -> bool: return self.value in {"builtin", "shared"} -ModuleInfo = collections.namedtuple("ModuleInfo", "name state") +class ModuleInfo(NamedTuple): + name: str + state: ModuleState class ModuleChecker: @@ -135,9 +143,9 @@ class ModuleChecker: setup_files = ( # see end of configure.ac - "Modules/Setup.local", - "Modules/Setup.stdlib", - "Modules/Setup.bootstrap", + pathlib.Path("Modules/Setup.local"), + pathlib.Path("Modules/Setup.stdlib"), + pathlib.Path("Modules/Setup.bootstrap"), SRC_DIR / "Modules/Setup", ) @@ -149,15 +157,15 @@ def __init__(self, cross_compiling: bool = False, strict: bool = False): self.builddir = self.get_builddir() self.modules = self.get_modules() - self.builtin_ok = [] - self.shared_ok = [] - self.failed_on_import = [] - self.missing = [] - self.disabled_configure = [] - self.disabled_setup = [] - self.notavailable = [] + self.builtin_ok: list[ModuleInfo] = [] + self.shared_ok: list[ModuleInfo] = [] + self.failed_on_import: list[ModuleInfo] = [] + self.missing: list[ModuleInfo] = [] + self.disabled_configure: list[ModuleInfo] = [] + self.disabled_setup: list[ModuleInfo] = [] + self.notavailable: list[ModuleInfo] = [] - def check(self): + def check(self) -> None: if not hasattr(_imp, 'create_dynamic'): logger.warning( ('Dynamic extensions not supported ' @@ -189,10 +197,10 @@ def check(self): assert modinfo.state == ModuleState.SHARED self.shared_ok.append(modinfo) - def summary(self, *, verbose: bool = False): + def summary(self, *, verbose: bool = False) -> None: longest = max([len(e.name) for e in self.modules], default=0) - def print_three_column(modinfos: list[ModuleInfo]): + def print_three_column(modinfos: list[ModuleInfo]) -> None: names = [modinfo.name for modinfo in modinfos] names.sort(key=str.lower) # guarantee zip() doesn't drop anything @@ -262,12 +270,12 @@ def print_three_column(modinfos: list[ModuleInfo]): f"{len(self.failed_on_import)} failed on import)" ) - def check_strict_build(self): + def check_strict_build(self) -> None: """Fail if modules are missing and it's a strict build""" if self.strict_extensions_build and (self.failed_on_import or self.missing): raise RuntimeError("Failed to build some stdlib modules") - def list_module_names(self, *, all: bool = False) -> set: + def list_module_names(self, *, all: bool = False) -> set[str]: names = {modinfo.name for modinfo in self.modules} if all: names.update(WINDOWS_MODULES) @@ -280,9 +288,9 @@ def get_builddir(self) -> pathlib.Path: except FileNotFoundError: logger.error("%s must be run from the top build directory", __file__) raise - builddir = pathlib.Path(builddir) - logger.debug("%s: %s", self.pybuilddir_txt, builddir) - return builddir + builddir_path = pathlib.Path(builddir) + logger.debug("%s: %s", self.pybuilddir_txt, builddir_path) + return builddir_path def get_modules(self) -> list[ModuleInfo]: """Get module info from sysconfig and Modules/Setup* files""" @@ -367,7 +375,7 @@ def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]: case ["*disabled*"]: state = ModuleState.DISABLED case ["*noconfig*"]: - state = None + continue case [*items]: if state == ModuleState.DISABLED: # *disabled* can disable multiple modules per line @@ -384,26 +392,33 @@ def parse_setup_file(self, setup_file: pathlib.Path) -> Iterable[ModuleInfo]: def get_spec(self, modinfo: ModuleInfo) -> ModuleSpec: """Get ModuleSpec for builtin or extension module""" if modinfo.state == ModuleState.SHARED: - location = os.fspath(self.get_location(modinfo)) + mod_location = self.get_location(modinfo) + assert mod_location is not None + location = os.fspath(mod_location) loader = ExtensionFileLoader(modinfo.name, location) - return spec_from_file_location(modinfo.name, location, loader=loader) + spec = spec_from_file_location(modinfo.name, location, loader=loader) + assert spec is not None + return spec elif modinfo.state == ModuleState.BUILTIN: - return spec_from_loader(modinfo.name, loader=BuiltinImporter) + spec = spec_from_loader(modinfo.name, loader=BuiltinImporter) + assert spec is not None + return spec else: raise ValueError(modinfo) - def get_location(self, modinfo: ModuleInfo) -> pathlib.Path: + def get_location(self, modinfo: ModuleInfo) -> pathlib.Path | None: """Get shared library location in build directory""" if modinfo.state == ModuleState.SHARED: return self.builddir / f"{modinfo.name}{self.ext_suffix}" else: return None - def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec): + def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec) -> None: """Check that the module file is present and not empty""" - if spec.loader is BuiltinImporter: + if spec.loader is BuiltinImporter: # type: ignore[comparison-overlap] return try: + assert spec.origin is not None st = os.stat(spec.origin) except FileNotFoundError: logger.error("%s (%s) is missing", modinfo.name, spec.origin) @@ -411,7 +426,7 @@ def _check_file(self, modinfo: ModuleInfo, spec: ModuleSpec): if not st.st_size: raise ImportError(f"{spec.origin} is an empty file") - def check_module_import(self, modinfo: ModuleInfo): + def check_module_import(self, modinfo: ModuleInfo) -> None: """Attempt to import module and report errors""" spec = self.get_spec(modinfo) self._check_file(modinfo, spec) @@ -430,7 +445,7 @@ def check_module_import(self, modinfo: ModuleInfo): logger.exception("Importing extension '%s' failed!", modinfo.name) raise - def check_module_cross(self, modinfo: ModuleInfo): + def check_module_cross(self, modinfo: ModuleInfo) -> None: """Sanity check for cross compiling""" spec = self.get_spec(modinfo) self._check_file(modinfo, spec) @@ -443,6 +458,7 @@ def rename_module(self, modinfo: ModuleInfo) -> None: failed_name = f"{modinfo.name}_failed{self.ext_suffix}" builddir_path = self.get_location(modinfo) + assert builddir_path is not None if builddir_path.is_symlink(): symlink = builddir_path module_path = builddir_path.resolve().relative_to(os.getcwd()) @@ -466,7 +482,7 @@ def rename_module(self, modinfo: ModuleInfo) -> None: logger.debug("Rename '%s' -> '%s'", module_path, failed_path) -def main(): +def main() -> None: args = parser.parse_args() if args.debug: args.verbose = True diff --git a/Tools/build/check_warnings.py b/Tools/build/check_warnings.py index 3f49d8e7f2ee48..44ccf9708ad72f 100644 --- a/Tools/build/check_warnings.py +++ b/Tools/build/check_warnings.py @@ -8,21 +8,29 @@ import sys from collections import defaultdict from pathlib import Path -from typing import NamedTuple +from typing import NamedTuple, TypedDict class IgnoreRule(NamedTuple): file_path: str - count: int + count: int # type: ignore[assignment] ignore_all: bool = False is_directory: bool = False +class CompileWarning(TypedDict): + file: str + line: str + column: str + message: str + option: str + + def parse_warning_ignore_file(file_path: str) -> set[IgnoreRule]: """ Parses the warning ignore file and returns a set of IgnoreRules """ - files_with_expected_warnings = set() + files_with_expected_warnings: set[IgnoreRule] = set() with Path(file_path).open(encoding="UTF-8") as ignore_rules_file: files_with_expected_warnings = set() for i, line in enumerate(ignore_rules_file): @@ -46,7 +54,7 @@ def parse_warning_ignore_file(file_path: str) -> set[IgnoreRule]: ) sys.exit(1) if ignore_all: - count = 0 + count = "0" files_with_expected_warnings.add( IgnoreRule( @@ -61,7 +69,7 @@ def extract_warnings_from_compiler_output( compiler_output: str, compiler_output_type: str, path_prefix: str = "", -) -> list[dict]: +) -> list[CompileWarning]: """ Extracts warnings from the compiler output based on compiler output type. Removes path prefix from file paths if provided. @@ -78,8 +86,12 @@ def extract_warnings_from_compiler_output( r"(?P<file>.*):(?P<line>\d+):(?P<column>\d+): warning: " r"(?P<message>.*) (?P<option>\[-[^\]]+\])$" ) + else: + raise RuntimeError( + f"Unsupported compiler output type: {compiler_output_type}", + ) compiled_regex = re.compile(regex_pattern) - compiler_warnings = [] + compiler_warnings: list[CompileWarning] = [] for i, line in enumerate(compiler_output.splitlines(), start=1): if match := compiled_regex.match(line): try: @@ -100,7 +112,9 @@ def extract_warnings_from_compiler_output( return compiler_warnings -def get_warnings_by_file(warnings: list[dict]) -> dict[str, list[dict]]: +def get_warnings_by_file( + warnings: list[CompileWarning], +) -> dict[str, list[CompileWarning]]: """ Returns a dictionary where the key is the file and the data is the warnings in that file. Does not include duplicate warnings for a @@ -138,7 +152,7 @@ def is_file_ignored( def get_unexpected_warnings( ignore_rules: set[IgnoreRule], - files_with_warnings: set[IgnoreRule], + files_with_warnings: dict[str, list[CompileWarning]], ) -> int: """ Returns failure status if warnings discovered in list of warnings @@ -180,7 +194,7 @@ def get_unexpected_warnings( def get_unexpected_improvements( ignore_rules: set[IgnoreRule], - files_with_warnings: set[IgnoreRule], + files_with_warnings: dict[str, list[CompileWarning]], ) -> int: """ Returns failure status if the number of warnings for a file is greater diff --git a/Tools/build/compute-changes.py b/Tools/build/compute-changes.py index cfdd55fd1925fd..524d3066fbffa7 100644 --- a/Tools/build/compute-changes.py +++ b/Tools/build/compute-changes.py @@ -45,25 +45,43 @@ SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"}) SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"}) +ANDROID_DIRS = frozenset({"Android"}) +IOS_DIRS = frozenset({"Apple", "iOS"}) +MACOS_DIRS = frozenset({"Mac"}) +WASI_DIRS = frozenset({Path("Tools", "wasm")}) + @dataclass(kw_only=True, slots=True) class Outputs: + run_android: bool = False run_ci_fuzz: bool = False run_docs: bool = False + run_ios: bool = False + run_macos: bool = False run_tests: bool = False + run_ubuntu: bool = False + run_wasi: bool = False run_windows_msi: bool = False run_windows_tests: bool = False def compute_changes() -> None: target_branch, head_ref = git_refs() - if target_branch and head_ref: + if os.environ.get("GITHUB_EVENT_NAME", "") == "pull_request": # Getting changed files only makes sense on a pull request files = get_changed_files(target_branch, head_ref) outputs = process_changed_files(files) else: # Otherwise, just run the tests - outputs = Outputs(run_tests=True, run_windows_tests=True) + outputs = Outputs( + run_android=True, + run_ios=True, + run_macos=True, + run_tests=True, + run_ubuntu=True, + run_wasi=True, + run_windows_tests=True, + ) outputs = process_target_branch(outputs, target_branch) if outputs.run_tests: @@ -111,6 +129,21 @@ def get_changed_files( return frozenset(map(Path, filter(None, map(str.strip, changed_files)))) +def get_file_platform(file: Path) -> str | None: + if not file.parts: + return None + first_part = file.parts[0] + if first_part in MACOS_DIRS: + return "macos" + if first_part in IOS_DIRS: + return "ios" + if first_part in ANDROID_DIRS: + return "android" + if len(file.parts) >= 2 and Path(*file.parts[:2]) in WASI_DIRS: # Tools/wasm/ + return "wasi" + return None + + def process_changed_files(changed_files: Set[Path]) -> Outputs: run_tests = False run_ci_fuzz = False @@ -118,6 +151,9 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: run_windows_tests = False run_windows_msi = False + platforms_changed = set() + has_platform_specific_change = True + for file in changed_files: # Documentation files doc_or_misc = file.parts[0] in {"Doc", "Misc"} @@ -126,10 +162,15 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: if file.parent == GITHUB_WORKFLOWS_PATH: if file.name == "build.yml": run_tests = run_ci_fuzz = True + has_platform_specific_change = False if file.name == "reusable-docs.yml": run_docs = True if file.name == "reusable-windows-msi.yml": run_windows_msi = True + if file.name == "reusable-macos.yml": + platforms_changed.add("macos") + if file.name == "reusable-wasi.yml": + platforms_changed.add("wasi") if not ( doc_file @@ -138,8 +179,13 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: ): run_tests = True - if file not in UNIX_BUILD_SYSTEM_FILE_NAMES: - run_windows_tests = True + platform = get_file_platform(file) + if platform is not None: + platforms_changed.add(platform) + else: + has_platform_specific_change = False + if file not in UNIX_BUILD_SYSTEM_FILE_NAMES: + run_windows_tests = True # The fuzz tests are pretty slow so they are executed only for PRs # changing relevant files. @@ -159,12 +205,38 @@ def process_changed_files(changed_files: Set[Path]) -> Outputs: if file.parts[:2] == ("Tools", "msi"): run_windows_msi = True + # Check which platform specific tests to run + if run_tests: + if not has_platform_specific_change or not platforms_changed: + run_android = True + run_ios = True + run_macos = True + run_ubuntu = True + run_wasi = True + else: + run_android = "android" in platforms_changed + run_ios = "ios" in platforms_changed + run_macos = "macos" in platforms_changed + run_ubuntu = False + run_wasi = "wasi" in platforms_changed + else: + run_android = False + run_ios = False + run_macos = False + run_ubuntu = False + run_wasi = False + return Outputs( + run_android=run_android, run_ci_fuzz=run_ci_fuzz, run_docs=run_docs, + run_ios=run_ios, + run_macos=run_macos, run_tests=run_tests, - run_windows_tests=run_windows_tests, + run_ubuntu=run_ubuntu, + run_wasi=run_wasi, run_windows_msi=run_windows_msi, + run_windows_tests=run_windows_tests, ) @@ -191,11 +263,16 @@ def write_github_output(outputs: Outputs) -> None: return with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f: + f.write(f"run-android={bool_lower(outputs.run_android)}\n") f.write(f"run-ci-fuzz={bool_lower(outputs.run_ci_fuzz)}\n") f.write(f"run-docs={bool_lower(outputs.run_docs)}\n") + f.write(f"run-ios={bool_lower(outputs.run_ios)}\n") + f.write(f"run-macos={bool_lower(outputs.run_macos)}\n") f.write(f"run-tests={bool_lower(outputs.run_tests)}\n") - f.write(f"run-windows-tests={bool_lower(outputs.run_windows_tests)}\n") + f.write(f"run-ubuntu={bool_lower(outputs.run_ubuntu)}\n") + f.write(f"run-wasi={bool_lower(outputs.run_wasi)}\n") f.write(f"run-windows-msi={bool_lower(outputs.run_windows_msi)}\n") + f.write(f"run-windows-tests={bool_lower(outputs.run_windows_tests)}\n") def bool_lower(value: bool, /) -> str: diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py index 23f58447937976..2b9f03aebb6d7e 100644 --- a/Tools/build/deepfreeze.py +++ b/Tools/build/deepfreeze.py @@ -2,9 +2,12 @@ The script may be executed by _bootstrap_python interpreter. Shared library extension modules are not available in that case. -On Windows, and in cross-compilation cases, it is executed -by Python 3.10, and 3.11 features are not available. +Requires 3.11+ to be executed, +because relies on `code.co_qualname` and `code.co_exceptiontable`. """ + +from __future__ import annotations + import argparse import builtins import collections @@ -13,10 +16,14 @@ import re import time import types -from typing import TextIO import umarshal +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, TextIO + ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) verbose = False @@ -45,8 +52,8 @@ def make_string_literal(b: bytes) -> str: next_code_version = 1 -def get_localsplus(code: types.CodeType): - a = collections.defaultdict(int) +def get_localsplus(code: types.CodeType) -> tuple[tuple[str, ...], bytes]: + a: collections.defaultdict[str, int] = collections.defaultdict(int) for name in code.co_varnames: a[name] |= CO_FAST_LOCAL for name in code.co_cellvars: @@ -136,7 +143,7 @@ def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]: return identifiers, strings @contextlib.contextmanager - def indent(self) -> None: + def indent(self) -> Iterator[None]: save_level = self.level try: self.level += 1 @@ -148,7 +155,7 @@ def write(self, arg: str) -> None: self.file.writelines((" "*self.level, arg, "\n")) @contextlib.contextmanager - def block(self, prefix: str, suffix: str = "") -> None: + def block(self, prefix: str, suffix: str = "") -> Iterator[None]: self.write(prefix + " {") with self.indent(): yield @@ -250,9 +257,17 @@ def generate_code(self, name: str, code: types.CodeType) -> str: co_names = self.generate(name + "_names", code.co_names) co_filename = self.generate(name + "_filename", code.co_filename) co_name = self.generate(name + "_name", code.co_name) - co_qualname = self.generate(name + "_qualname", code.co_qualname) co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) + # We use 3.10 for type checking, but this module requires 3.11 + # TODO: bump python version for this script. + co_qualname = self.generate( + name + "_qualname", + code.co_qualname, # type: ignore[attr-defined] + ) + co_exceptiontable = self.generate( + name + "_exceptiontable", + code.co_exceptiontable, # type: ignore[attr-defined] + ) # These fields are not directly accessible localsplusnames, localspluskinds = get_localsplus(code) co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames) @@ -379,13 +394,13 @@ def generate_complex(self, name: str, z: complex) -> str: self.write(f".cval = {{ {z.real}, {z.imag} }},") return f"&{name}.ob_base" - def generate_frozenset(self, name: str, fs: frozenset[object]) -> str: + def generate_frozenset(self, name: str, fs: frozenset[Any]) -> str: try: - fs = sorted(fs) + fs_sorted = sorted(fs) except TypeError: # frozen set with incompatible types, fallback to repr() - fs = sorted(fs, key=repr) - ret = self.generate_tuple(name, tuple(fs)) + fs_sorted = sorted(fs, key=repr) + ret = self.generate_tuple(name, tuple(fs_sorted)) self.write("// TODO: The above tuple should be a frozenset") return ret @@ -402,7 +417,7 @@ def generate(self, name: str, obj: object) -> str: # print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}") return self.cache[key] self.misses += 1 - if isinstance(obj, (types.CodeType, umarshal.Code)) : + if isinstance(obj, types.CodeType) : val = self.generate_code(name, obj) elif isinstance(obj, tuple): val = self.generate_tuple(name, obj) @@ -458,7 +473,7 @@ def decode_frozen_data(source: str) -> types.CodeType: if re.match(FROZEN_DATA_LINE, line): values.extend([int(x) for x in line.split(",") if x.strip()]) data = bytes(values) - return umarshal.loads(data) + return umarshal.loads(data) # type: ignore[no-any-return] def generate(args: list[str], output: TextIO) -> None: @@ -494,12 +509,12 @@ def generate(args: list[str], output: TextIO) -> None: help="Input file and module name (required) in file:modname format") @contextlib.contextmanager -def report_time(label: str): - t0 = time.time() +def report_time(label: str) -> Iterator[None]: + t0 = time.perf_counter() try: yield finally: - t1 = time.time() + t1 = time.perf_counter() if verbose: print(f"{label}: {t1-t0:.3f} sec") diff --git a/Tools/build/generate-build-details.py b/Tools/build/generate-build-details.py index 0da6c2948d6688..ed9ab2844d250a 100644 --- a/Tools/build/generate-build-details.py +++ b/Tools/build/generate-build-details.py @@ -3,6 +3,8 @@ # Script initially imported from: # https://github.com/FFY00/python-instrospection/blob/main/python_introspection/scripts/generate-build-details.py +from __future__ import annotations + import argparse import collections import importlib.machinery @@ -11,19 +13,23 @@ import sys import sysconfig +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Any + -def version_info_to_dict(obj): # (object) -> dict[str, Any] +def version_info_to_dict(obj: sys._version_info) -> dict[str, Any]: field_names = ('major', 'minor', 'micro', 'releaselevel', 'serial') return {field: getattr(obj, field) for field in field_names} -def get_dict_key(container, key): # (dict[str, Any], str) -> dict[str, Any] +def get_dict_key(container: dict[str, Any], key: str) -> dict[str, Any]: for part in key.split('.'): container = container[part] return container -def generate_data(schema_version): +def generate_data(schema_version: str) -> collections.defaultdict[str, Any]: """Generate the build-details.json data (PEP 739). :param schema_version: The schema version of the data we want to generate. @@ -32,7 +38,9 @@ def generate_data(schema_version): if schema_version != '1.0': raise ValueError(f'Unsupported schema_version: {schema_version}') - data = collections.defaultdict(lambda: collections.defaultdict(dict)) + data: collections.defaultdict[str, Any] = collections.defaultdict( + lambda: collections.defaultdict(dict), + ) data['schema_version'] = schema_version @@ -47,7 +55,7 @@ def generate_data(schema_version): data['language']['version'] = sysconfig.get_python_version() data['language']['version_info'] = version_info_to_dict(sys.version_info) - data['implementation'] = vars(sys.implementation) + data['implementation'] = vars(sys.implementation).copy() data['implementation']['version'] = version_info_to_dict(sys.implementation.version) # Fix cross-compilation if '_multiarch' in data['implementation']: @@ -67,7 +75,7 @@ def generate_data(schema_version): PY3LIBRARY = sysconfig.get_config_var('PY3LIBRARY') LIBPYTHON = sysconfig.get_config_var('LIBPYTHON') LIBPC = sysconfig.get_config_var('LIBPC') - INCLUDEDIR = sysconfig.get_config_var('INCLUDEDIR') + INCLUDEPY = sysconfig.get_config_var('INCLUDEPY') if os.name == 'posix': # On POSIX, LIBRARY is always the static library, while LDLIBRARY is the @@ -96,7 +104,7 @@ def generate_data(schema_version): data['abi']['extension_suffix'] = sysconfig.get_config_var('EXT_SUFFIX') # EXTENSION_SUFFIXES has been constant for a long time, and currently we - # don't have a better information source to find the stable ABI suffix. + # don't have a better information source to find the stable ABI suffix. for suffix in importlib.machinery.EXTENSION_SUFFIXES: if suffix.startswith('.abi'): data['abi']['stable_abi_suffix'] = suffix @@ -115,44 +123,62 @@ def generate_data(schema_version): if has_static_library: data['libpython']['static'] = os.path.join(LIBDIR, LIBRARY) - data['c_api']['include'] = INCLUDEDIR + data['c_api']['headers'] = INCLUDEPY if LIBPC: data['c_api']['pkgconfig_path'] = LIBPC return data -def make_paths_relative(data, config_path=None): # (dict[str, Any], str | None) -> None +def make_paths_relative(data: dict[str, Any], config_path: str | None = None) -> None: # Make base_prefix relative to the config_path directory if config_path: - data['base_prefix'] = os.path.relpath(data['base_prefix'], os.path.dirname(config_path)) + data['base_prefix'] = relative_path(data['base_prefix'], + os.path.dirname(config_path)) + base_prefix = data['base_prefix'] + # Update path values to make them relative to base_prefix - PATH_KEYS = [ + PATH_KEYS = ( 'base_interpreter', 'libpython.dynamic', 'libpython.dynamic_stableabi', 'libpython.static', 'c_api.headers', 'c_api.pkgconfig_path', - ] + ) for entry in PATH_KEYS: - parent, _, child = entry.rpartition('.') + *parents, child = entry.split('.') # Get the key container object try: container = data - for part in parent.split('.'): + for part in parents: container = container[part] + if child not in container: + raise KeyError(child) current_path = container[child] except KeyError: continue # Get the relative path - new_path = os.path.relpath(current_path, data['base_prefix']) + new_path = relative_path(current_path, base_prefix) # Join '.' so that the path is formated as './path' instead of 'path' new_path = os.path.join('.', new_path) container[child] = new_path -def main(): # () -> None +def relative_path(path: str, base: str) -> str: + if os.name != 'nt': + return os.path.relpath(path, base) + + # There are no relative paths between drives on Windows. + path_drv, _ = os.path.splitdrive(path) + base_drv, _ = os.path.splitdrive(base) + if path_drv.lower() == base_drv.lower(): + return os.path.relpath(path, base) + + return path + + +def main() -> None: parser = argparse.ArgumentParser(exit_on_error=False) parser.add_argument('location') parser.add_argument( @@ -178,8 +204,9 @@ def main(): # () -> None make_paths_relative(data, args.config_file_path) json_output = json.dumps(data, indent=2) - with open(args.location, 'w') as f: - print(json_output, file=f) + with open(args.location, 'w', encoding='utf-8') as f: + f.write(json_output) + f.write('\n') if __name__ == '__main__': diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index db01426e9722c3..f5a2f26cbb085c 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -4,10 +4,13 @@ import hashlib import json import os +import random import re import subprocess import sys +import time import typing +import urllib.error import urllib.request from pathlib import Path, PurePosixPath, PureWindowsPath @@ -161,6 +164,23 @@ def get_externals() -> list[str]: return externals +def download_with_retries(download_location: str, + max_retries: int = 7, + base_delay: float = 2.25, + max_jitter: float = 1.0) -> typing.Any: + """Download a file with exponential backoff retry.""" + for attempt in range(max_retries + 1): + try: + resp = urllib.request.urlopen(download_location) + except (urllib.error.URLError, ConnectionError) as ex: + if attempt == max_retries: + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(base_delay**attempt + random.uniform(0, max_jitter)) + else: + return resp + + def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: """Make a bunch of assertions about the SBOM package data to ensure it's consistent.""" @@ -175,7 +195,7 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: # and that the download URL is valid. if "checksums" not in package or "CI" in os.environ: download_location = package["downloadLocation"] - resp = urllib.request.urlopen(download_location) + resp = download_with_retries(download_location) error_if(resp.status != 200, f"Couldn't access URL: {download_location}'") package["checksums"] = [{ @@ -222,14 +242,14 @@ def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: ) # libexpat specifies its expected rev in a refresh script. - if package["name"] == "libexpat": + if package["name"] == "expat": libexpat_refresh_sh = (CPYTHON_ROOT_DIR / "Modules/expat/refresh.sh").read_text() libexpat_expected_version_match = re.search( r"expected_libexpat_version=\"([0-9]+\.[0-9]+\.[0-9]+)\"", libexpat_refresh_sh ) libexpat_expected_sha256_match = re.search( - r"expected_libexpat_sha256=\"[a-f0-9]{40}\"", + r"expected_libexpat_sha256=\"([a-f0-9]{64})\"", libexpat_refresh_sh ) libexpat_expected_version = libexpat_expected_version_match and libexpat_expected_version_match.group(1) diff --git a/Tools/build/generate_stdlib_module_names.py b/Tools/build/generate_stdlib_module_names.py index 88414cdbb37a8d..bda72539640611 100644 --- a/Tools/build/generate_stdlib_module_names.py +++ b/Tools/build/generate_stdlib_module_names.py @@ -1,9 +1,12 @@ # This script lists the names of standard library modules # to update Python/stdlib_module_names.h +from __future__ import annotations + import _imp import os.path import sys import sysconfig +from typing import TextIO from check_extension_modules import ModuleChecker @@ -48,12 +51,12 @@ } # Built-in modules -def list_builtin_modules(names): +def list_builtin_modules(names: set[str]) -> None: names |= set(sys.builtin_module_names) # Pure Python modules (Lib/*.py) -def list_python_modules(names): +def list_python_modules(names: set[str]) -> None: for filename in os.listdir(STDLIB_PATH): if not filename.endswith(".py"): continue @@ -62,7 +65,7 @@ def list_python_modules(names): # Packages in Lib/ -def list_packages(names): +def list_packages(names: set[str]) -> None: for name in os.listdir(STDLIB_PATH): if name in IGNORE: continue @@ -76,16 +79,16 @@ def list_packages(names): # Built-in and extension modules built by Modules/Setup* # includes Windows and macOS extensions. -def list_modules_setup_extensions(names): +def list_modules_setup_extensions(names: set[str]) -> None: checker = ModuleChecker() names.update(checker.list_module_names(all=True)) # List frozen modules of the PyImport_FrozenModules list (Python/frozen.c). # Use the "./Programs/_testembed list_frozen" command. -def list_frozen(names): +def list_frozen(names: set[str]) -> None: submodules = set() - for name in _imp._frozen_module_names(): + for name in _imp._frozen_module_names(): # type: ignore[attr-defined] # To skip __hello__, __hello_alias__ and etc. if name.startswith('__'): continue @@ -101,8 +104,8 @@ def list_frozen(names): raise Exception(f'unexpected frozen submodules: {sorted(submodules)}') -def list_modules(): - names = set() +def list_modules() -> set[str]: + names: set[str] = set() list_builtin_modules(names) list_modules_setup_extensions(names) @@ -127,7 +130,7 @@ def list_modules(): return names -def write_modules(fp, names): +def write_modules(fp: TextIO, names: set[str]) -> None: print(f"// Auto-generated by {SCRIPT_NAME}.", file=fp) print("// List used to create sys.stdlib_module_names.", file=fp) @@ -138,7 +141,7 @@ def write_modules(fp, names): print("};", file=fp) -def main(): +def main() -> None: if not sysconfig.is_python_build(): print(f"ERROR: {sys.executable} is not a Python build", file=sys.stderr) diff --git a/Tools/build/mypy.ini b/Tools/build/mypy.ini index 06224163884a98..331bada6f47d2e 100644 --- a/Tools/build/mypy.ini +++ b/Tools/build/mypy.ini @@ -1,7 +1,19 @@ [mypy] + +# Please, when adding new files here, also add them to: +# .github/workflows/mypy.yml files = + Tools/build/check_extension_modules.py, + Tools/build/check_warnings.py, Tools/build/compute-changes.py, - Tools/build/generate_sbom.py + Tools/build/deepfreeze.py, + Tools/build/generate-build-details.py, + Tools/build/generate_sbom.py, + Tools/build/generate_stdlib_module_names.py, + Tools/build/verify_ensurepip_wheels.py, + Tools/build/update_file.py, + Tools/build/umarshal.py + pretty = True # Make sure Python can still be built @@ -10,6 +22,8 @@ python_version = 3.10 # ...And be strict: strict = True +strict_bytes = True +local_partial_types = True extra_checks = True enable_error_code = ignore-without-code,redundant-expr,truthy-bool,possibly-undefined warn_unreachable = True diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py index 1ddd76cdd9bf64..39115b331ba642 100644 --- a/Tools/build/stable_abi.py +++ b/Tools/build/stable_abi.py @@ -232,7 +232,7 @@ def sort_key(item): 'data': 'data', 'struct': 'type', 'macro': 'macro', - # 'const': 'const', # all undocumented + 'const': 'macro', 'typedef': 'type', } diff --git a/Tools/build/umarshal.py b/Tools/build/umarshal.py index 679fa7caf9f931..865cffc2440122 100644 --- a/Tools/build/umarshal.py +++ b/Tools/build/umarshal.py @@ -145,12 +145,12 @@ def r_PyLong(self) -> int: def r_float_bin(self) -> float: buf = self.r_string(8) import struct # Lazy import to avoid breaking UNIX build - return struct.unpack("d", buf)[0] + return struct.unpack("d", buf)[0] # type: ignore[no-any-return] def r_float_str(self) -> float: n = self.r_byte() buf = self.r_string(n) - return ast.literal_eval(buf.decode("ascii")) + return ast.literal_eval(buf.decode("ascii")) # type: ignore[no-any-return] def r_ref_reserve(self, flag: int) -> int: if flag: @@ -306,7 +306,7 @@ def loads(data: bytes) -> Any: return r.r_object() -def main(): +def main() -> None: # Test import marshal import pprint @@ -314,8 +314,9 @@ def main(): data = marshal.dumps(sample) retval = loads(data) assert retval == sample, retval - sample = main.__code__ - data = marshal.dumps(sample) + + sample2 = main.__code__ + data = marshal.dumps(sample2) retval = loads(data) assert isinstance(retval, Code), retval pprint.pprint(retval.__dict__) diff --git a/Tools/build/update_file.py b/Tools/build/update_file.py index b4182c1d0cb638..b4a5fb6e778ae8 100644 --- a/Tools/build/update_file.py +++ b/Tools/build/update_file.py @@ -6,14 +6,27 @@ actually change the in-tree generated code. """ +from __future__ import annotations + import contextlib import os import os.path import sys +TYPE_CHECKING = False +if TYPE_CHECKING: + import typing + from collections.abc import Iterator + from io import TextIOWrapper + + _Outcome: typing.TypeAlias = typing.Literal['created', 'updated', 'same'] + @contextlib.contextmanager -def updating_file_with_tmpfile(filename, tmpfile=None): +def updating_file_with_tmpfile( + filename: str, + tmpfile: str | None = None, +) -> Iterator[tuple[TextIOWrapper, TextIOWrapper]]: """A context manager for updating a file via a temp file. The context manager provides two open files: the source file open @@ -46,13 +59,18 @@ def updating_file_with_tmpfile(filename, tmpfile=None): update_file_with_tmpfile(filename, tmpfile) -def update_file_with_tmpfile(filename, tmpfile, *, create=False): +def update_file_with_tmpfile( + filename: str, + tmpfile: str, + *, + create: bool = False, +) -> _Outcome: try: targetfile = open(filename, 'rb') except FileNotFoundError: if not create: raise # re-raise - outcome = 'created' + outcome: _Outcome = 'created' os.replace(tmpfile, filename) else: with targetfile: diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py index a37da2f70757e5..46c42916d9354d 100755 --- a/Tools/build/verify_ensurepip_wheels.py +++ b/Tools/build/verify_ensurepip_wheels.py @@ -20,13 +20,13 @@ GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" -def print_notice(file_path: str, message: str) -> None: +def print_notice(file_path: str | Path, message: str) -> None: if GITHUB_ACTIONS: message = f"::notice file={file_path}::{message}" print(message, end="\n\n") -def print_error(file_path: str, message: str) -> None: +def print_error(file_path: str | Path, message: str) -> None: if GITHUB_ACTIONS: message = f"::error file={file_path}::{message}" print(message, end="\n\n") @@ -67,6 +67,7 @@ def verify_wheel(package_name: str) -> bool: return False release_files = json.loads(raw_text)["releases"][package_version] + expected_digest = "" for release_info in release_files: if package_path.name != release_info["filename"]: continue @@ -95,6 +96,7 @@ def verify_wheel(package_name: str) -> bool: return True + if __name__ == "__main__": exit_status = int(not verify_wheel("pip")) raise SystemExit(exit_status) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 037fe11ea223c7..2d6726faf7757a 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -1,5 +1,4 @@ import os.path -import re from c_parser.preprocessor import ( get_preprocessor as _get_preprocessor, @@ -18,16 +17,16 @@ def _abs(relfile): return os.path.join(REPO_ROOT, relfile) -def clean_lines(text): - """Clear out comments, blank lines, and leading/trailing whitespace.""" - lines = (line.strip() for line in text.splitlines()) - lines = (line.partition('#')[0].rstrip() - for line in lines - if line and not line.startswith('#')) - glob_all = f'{GLOB_ALL} ' - lines = (re.sub(r'^[*] ', glob_all, line) for line in lines) - lines = (_abs(line) for line in lines) - return list(lines) +def format_conf_lines(lines): + """Format conf entries for use in a TSV table.""" + return [_abs(entry) for entry in lines] + + +def format_tsv_lines(lines): + """Format entries for use in a TSV table.""" + lines = ((_abs(first), *rest) for first, *rest in lines) + lines = map('\t'.join, lines) + return list(map(str.rstrip, lines)) ''' @@ -47,252 +46,241 @@ def clean_lines(text): ''' # XXX Handle these. -# Tab separated: -EXCLUDED = clean_lines(''' -# @begin=conf@ - -# OSX -Modules/_scproxy.c # SystemConfiguration/SystemConfiguration.h - -# Windows -Modules/_winapi.c # windows.h -Modules/expat/winconfig.h -Modules/overlapped.c # winsock.h -Python/dynload_win.c # windows.h -Python/thread_nt.h - -# other OS-dependent -Python/dynload_aix.c # sys/ldr.h -Python/dynload_dl.c # dl.h -Python/dynload_hpux.c # dl.h -Python/emscripten_signal.c -Python/thread_pthread.h -Python/thread_pthread_stubs.h - -# only huge constants (safe but parsing is slow) -Modules/_ssl_data_*.h -Modules/cjkcodecs/mappings_*.h -Modules/unicodedata_db.h -Modules/unicodename_db.h -Objects/unicodetype_db.h - -# generated -Python/deepfreeze/*.c -Python/frozen_modules/*.h -Python/generated_cases.c.h -Python/executor_cases.c.h -Python/optimizer_cases.c.h - -# not actually source -Python/bytecodes.c -Python/optimizer_bytecodes.c - -# mimalloc -Objects/mimalloc/*.c -Include/internal/mimalloc/*.h -Include/internal/mimalloc/mimalloc/*.h - -# @end=conf@ -''') +EXCLUDED = format_conf_lines([ + # macOS + 'Modules/_scproxy.c', # SystemConfiguration/SystemConfiguration.h + + # Windows + 'Modules/_winapi.c', # windows.h + 'Modules/expat/winconfig.h', + 'Modules/overlapped.c', # winsock.h + 'Python/dynload_win.c', # windows.h + 'Python/thread_nt.h', + + # other OS-dependent + 'Python/dynload_aix.c', # sys/ldr.h + 'Python/dynload_dl.c', # dl.h + 'Python/dynload_hpux.c', # dl.h + 'Python/emscripten_signal.c', + 'Python/emscripten_syscalls.c', + 'Python/emscripten_trampoline_inner.c', + 'Python/thread_pthread.h', + 'Python/thread_pthread_stubs.h', + + # only huge constants (safe but parsing is slow) + 'Modules/_ssl_data_*.h', + 'Modules/cjkcodecs/mappings_*.h', + 'Modules/unicodedata_db.h', + 'Modules/unicodename_db.h', + 'Objects/unicodetype_db.h', + + # generated + 'Python/deepfreeze/*.c', + 'Python/frozen_modules/*.h', + 'Python/generated_cases.c.h', + 'Python/executor_cases.c.h', + 'Python/optimizer_cases.c.h', + # XXX: Throws errors if PY_VERSION_HEX is not mocked out + 'Modules/clinic/_testclinic_depr.c.h', + + # not actually source + 'Python/bytecodes.c', + 'Python/optimizer_bytecodes.c', + + # mimalloc + 'Objects/mimalloc/*.c', + 'Include/internal/mimalloc/*.h', + 'Include/internal/mimalloc/mimalloc/*.h', +]) # XXX Fix the parser. -EXCLUDED += clean_lines(''' -# The tool should be able to parse these... - -# The problem with xmlparse.c is that something -# has gone wrong where # we handle "maybe inline actual" -# in Tools/c-analyzer/c_parser/parser/_global.py. -Modules/expat/internal.h -Modules/expat/xmlparse.c -''') - -INCL_DIRS = clean_lines(''' -# @begin=tsv@ - -glob dirname -* . -* ./Include -* ./Include/internal -* ./Include/internal/mimalloc - -Modules/_decimal/**/*.c Modules/_decimal/libmpdec -Modules/_elementtree.c Modules/expat -Modules/_hacl/*.c Modules/_hacl/include -Modules/_hacl/*.c Modules/_hacl/ -Modules/_hacl/*.h Modules/_hacl/include -Modules/_hacl/*.h Modules/_hacl/ -Modules/md5module.c Modules/_hacl/include -Modules/sha1module.c Modules/_hacl/include -Modules/sha2module.c Modules/_hacl/include -Modules/sha3module.c Modules/_hacl/include -Modules/blake2module.c Modules/_hacl/include -Modules/hmacmodule.c Modules/_hacl/include -Objects/stringlib/*.h Objects - -# possible system-installed headers, just in case -Modules/_tkinter.c /usr/include/tcl8.6 -Modules/_uuidmodule.c /usr/include/uuid -Modules/tkappinit.c /usr/include/tcl - -# @end=tsv@ -''')[1:] - -INCLUDES = clean_lines(''' -# @begin=tsv@ - -glob include - -**/*.h Python.h -Include/**/*.h object.h - -# for Py_HAVE_CONDVAR -Include/internal/pycore_gil.h pycore_condvar.h -Python/thread_pthread.h pycore_condvar.h - -# other - -Objects/stringlib/join.h stringlib/stringdefs.h -Objects/stringlib/ctype.h stringlib/stringdefs.h -Objects/stringlib/transmogrify.h stringlib/stringdefs.h -#Objects/stringlib/fastsearch.h stringlib/stringdefs.h -#Objects/stringlib/count.h stringlib/stringdefs.h -#Objects/stringlib/find.h stringlib/stringdefs.h -#Objects/stringlib/partition.h stringlib/stringdefs.h -#Objects/stringlib/split.h stringlib/stringdefs.h -Objects/stringlib/fastsearch.h stringlib/ucs1lib.h -Objects/stringlib/count.h stringlib/ucs1lib.h -Objects/stringlib/find.h stringlib/ucs1lib.h -Objects/stringlib/partition.h stringlib/ucs1lib.h -Objects/stringlib/split.h stringlib/ucs1lib.h -Objects/stringlib/find_max_char.h Objects/stringlib/ucs1lib.h -Objects/stringlib/count.h Objects/stringlib/fastsearch.h -Objects/stringlib/find.h Objects/stringlib/fastsearch.h -Objects/stringlib/partition.h Objects/stringlib/fastsearch.h -Objects/stringlib/replace.h Objects/stringlib/fastsearch.h -Objects/stringlib/repr.h Objects/stringlib/fastsearch.h -Objects/stringlib/split.h Objects/stringlib/fastsearch.h - -# @end=tsv@ -''')[1:] - -MACROS = clean_lines(''' -# @begin=tsv@ - -glob name value - -Include/internal/*.h Py_BUILD_CORE 1 -Python/**/*.c Py_BUILD_CORE 1 -Python/**/*.h Py_BUILD_CORE 1 -Parser/**/*.c Py_BUILD_CORE 1 -Parser/**/*.h Py_BUILD_CORE 1 -Objects/**/*.c Py_BUILD_CORE 1 -Objects/**/*.h Py_BUILD_CORE 1 - -Modules/_asynciomodule.c Py_BUILD_CORE 1 -Modules/_codecsmodule.c Py_BUILD_CORE 1 -Modules/_collectionsmodule.c Py_BUILD_CORE 1 -Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1 -Modules/_ctypes/cfield.c Py_BUILD_CORE 1 -Modules/_cursesmodule.c Py_BUILD_CORE 1 -Modules/_datetimemodule.c Py_BUILD_CORE 1 -Modules/_functoolsmodule.c Py_BUILD_CORE 1 -Modules/_heapqmodule.c Py_BUILD_CORE 1 -Modules/_io/*.c Py_BUILD_CORE 1 -Modules/_io/*.h Py_BUILD_CORE 1 -Modules/_localemodule.c Py_BUILD_CORE 1 -Modules/_operator.c Py_BUILD_CORE 1 -Modules/_posixsubprocess.c Py_BUILD_CORE 1 -Modules/_sre/sre.c Py_BUILD_CORE 1 -Modules/_threadmodule.c Py_BUILD_CORE 1 -Modules/_tracemalloc.c Py_BUILD_CORE 1 -Modules/_weakref.c Py_BUILD_CORE 1 -Modules/_zoneinfo.c Py_BUILD_CORE 1 -Modules/atexitmodule.c Py_BUILD_CORE 1 -Modules/cmathmodule.c Py_BUILD_CORE 1 -Modules/faulthandler.c Py_BUILD_CORE 1 -Modules/gcmodule.c Py_BUILD_CORE 1 -Modules/getpath.c Py_BUILD_CORE 1 -Modules/getpath_noop.c Py_BUILD_CORE 1 -Modules/itertoolsmodule.c Py_BUILD_CORE 1 -Modules/main.c Py_BUILD_CORE 1 -Modules/mathmodule.c Py_BUILD_CORE 1 -Modules/posixmodule.c Py_BUILD_CORE 1 -Modules/sha256module.c Py_BUILD_CORE 1 -Modules/sha512module.c Py_BUILD_CORE 1 -Modules/signalmodule.c Py_BUILD_CORE 1 -Modules/symtablemodule.c Py_BUILD_CORE 1 -Modules/timemodule.c Py_BUILD_CORE 1 -Modules/unicodedata.c Py_BUILD_CORE 1 - -Modules/_json.c Py_BUILD_CORE_BUILTIN 1 -Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1 -Modules/_testinternalcapi.c Py_BUILD_CORE_BUILTIN 1 - -Include/cpython/abstract.h Py_CPYTHON_ABSTRACTOBJECT_H 1 -Include/cpython/bytearrayobject.h Py_CPYTHON_BYTEARRAYOBJECT_H 1 -Include/cpython/bytesobject.h Py_CPYTHON_BYTESOBJECT_H 1 -Include/cpython/ceval.h Py_CPYTHON_CEVAL_H 1 -Include/cpython/code.h Py_CPYTHON_CODE_H 1 -Include/cpython/dictobject.h Py_CPYTHON_DICTOBJECT_H 1 -Include/cpython/fileobject.h Py_CPYTHON_FILEOBJECT_H 1 -Include/cpython/fileutils.h Py_CPYTHON_FILEUTILS_H 1 -Include/cpython/frameobject.h Py_CPYTHON_FRAMEOBJECT_H 1 -Include/cpython/import.h Py_CPYTHON_IMPORT_H 1 -Include/cpython/interpreteridobject.h Py_CPYTHON_INTERPRETERIDOBJECT_H 1 -Include/cpython/listobject.h Py_CPYTHON_LISTOBJECT_H 1 -Include/cpython/methodobject.h Py_CPYTHON_METHODOBJECT_H 1 -Include/cpython/object.h Py_CPYTHON_OBJECT_H 1 -Include/cpython/objimpl.h Py_CPYTHON_OBJIMPL_H 1 -Include/cpython/pyerrors.h Py_CPYTHON_ERRORS_H 1 -Include/cpython/pylifecycle.h Py_CPYTHON_PYLIFECYCLE_H 1 -Include/cpython/pymem.h Py_CPYTHON_PYMEM_H 1 -Include/cpython/pystate.h Py_CPYTHON_PYSTATE_H 1 -Include/cpython/sysmodule.h Py_CPYTHON_SYSMODULE_H 1 -Include/cpython/traceback.h Py_CPYTHON_TRACEBACK_H 1 -Include/cpython/tupleobject.h Py_CPYTHON_TUPLEOBJECT_H 1 -Include/cpython/unicodeobject.h Py_CPYTHON_UNICODEOBJECT_H 1 - -# implied include of <unistd.h> -Include/**/*.h _POSIX_THREADS 1 -Include/**/*.h HAVE_PTHREAD_H 1 - -# from pyconfig.h -Include/cpython/pthread_stubs.h HAVE_PTHREAD_STUBS 1 -Python/thread_pthread_stubs.h HAVE_PTHREAD_STUBS 1 - -# from Objects/bytesobject.c -Objects/stringlib/partition.h STRINGLIB_GET_EMPTY() bytes_get_empty() -Objects/stringlib/join.h STRINGLIB_MUTABLE 0 -Objects/stringlib/partition.h STRINGLIB_MUTABLE 0 -Objects/stringlib/split.h STRINGLIB_MUTABLE 0 -Objects/stringlib/transmogrify.h STRINGLIB_MUTABLE 0 - -# from Makefile -Modules/getpath.c PYTHONPATH 1 -Modules/getpath.c PREFIX ... -Modules/getpath.c EXEC_PREFIX ... -Modules/getpath.c VERSION ... -Modules/getpath.c VPATH ... -Modules/getpath.c PLATLIBDIR ... -#Modules/_dbmmodule.c USE_GDBM_COMPAT 1 -Modules/_dbmmodule.c USE_NDBM 1 -#Modules/_dbmmodule.c USE_BERKDB 1 - -# See: setup.py -Modules/_decimal/**/*.c CONFIG_64 1 -Modules/_decimal/**/*.c ASM 1 -Modules/expat/xmlparse.c HAVE_EXPAT_CONFIG_H 1 -Modules/expat/xmlparse.c XML_POOR_ENTROPY 1 -Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1 - -# others -Modules/_sre/sre_lib.h LOCAL(type) static inline type -Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F -Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 -Include/internal/pycore_crossinterp_data_registry.h Py_CORE_CROSSINTERP_DATA_REGISTRY_H 1 - -# @end=tsv@ -''')[1:] +EXCLUDED += format_conf_lines([ + # The tool should be able to parse these... + + # The problem with xmlparse.c is that something + # has gone wrong where # we handle "maybe inline actual" + # in Tools/c-analyzer/c_parser/parser/_global.py. + 'Modules/expat/internal.h', + 'Modules/expat/xmlparse.c', +]) + +INCL_DIRS = format_tsv_lines([ + # (glob, dirname) + + ('*', '.'), + ('*', './Include'), + ('*', './Include/internal'), + ('*', './Include/internal/mimalloc'), + + ('Modules/_decimal/**/*.c', 'Modules/_decimal/libmpdec'), + ('Modules/_elementtree.c', 'Modules/expat'), + ('Modules/_hacl/*.c', 'Modules/_hacl/include'), + ('Modules/_hacl/*.c', 'Modules/_hacl/'), + ('Modules/_hacl/*.h', 'Modules/_hacl/include'), + ('Modules/_hacl/*.h', 'Modules/_hacl/'), + ('Modules/md5module.c', 'Modules/_hacl/include'), + ('Modules/sha1module.c', 'Modules/_hacl/include'), + ('Modules/sha2module.c', 'Modules/_hacl/include'), + ('Modules/sha3module.c', 'Modules/_hacl/include'), + ('Modules/blake2module.c', 'Modules/_hacl/include'), + ('Modules/hmacmodule.c', 'Modules/_hacl/include'), + ('Objects/stringlib/*.h', 'Objects'), + + # possible system-installed headers, just in case + ('Modules/_tkinter.c', '/usr/include/tcl8.6'), + ('Modules/_uuidmodule.c', '/usr/include/uuid'), + ('Modules/tkappinit.c', '/usr/include/tcl'), + +]) + +INCLUDES = format_tsv_lines([ + # (glob, include) + + ('**/*.h', 'Python.h'), + ('Include/**/*.h', 'object.h'), + + # for Py_HAVE_CONDVAR + ('Include/internal/pycore_gil.h', 'pycore_condvar.h'), + ('Python/thread_pthread.h', 'pycore_condvar.h'), + + # other + + ('Objects/stringlib/join.h', 'stringlib/stringdefs.h'), + ('Objects/stringlib/ctype.h', 'stringlib/stringdefs.h'), + ('Objects/stringlib/transmogrify.h', 'stringlib/stringdefs.h'), + # ('Objects/stringlib/fastsearch.h', 'stringlib/stringdefs.h'), + # ('Objects/stringlib/count.h', 'stringlib/stringdefs.h'), + # ('Objects/stringlib/find.h', 'stringlib/stringdefs.h'), + # ('Objects/stringlib/partition.h', 'stringlib/stringdefs.h'), + # ('Objects/stringlib/split.h', 'stringlib/stringdefs.h'), + ('Objects/stringlib/fastsearch.h', 'stringlib/ucs1lib.h'), + ('Objects/stringlib/count.h', 'stringlib/ucs1lib.h'), + ('Objects/stringlib/find.h', 'stringlib/ucs1lib.h'), + ('Objects/stringlib/partition.h', 'stringlib/ucs1lib.h'), + ('Objects/stringlib/split.h', 'stringlib/ucs1lib.h'), + ('Objects/stringlib/find_max_char.h', 'Objects/stringlib/ucs1lib.h'), + ('Objects/stringlib/count.h', 'Objects/stringlib/fastsearch.h'), + ('Objects/stringlib/find.h', 'Objects/stringlib/fastsearch.h'), + ('Objects/stringlib/partition.h', 'Objects/stringlib/fastsearch.h'), + ('Objects/stringlib/replace.h', 'Objects/stringlib/fastsearch.h'), + ('Objects/stringlib/repr.h', 'Objects/stringlib/fastsearch.h'), + ('Objects/stringlib/split.h', 'Objects/stringlib/fastsearch.h'), +]) + +MACROS = format_tsv_lines([ + # (glob, name, value) + + ('Include/internal/*.h', 'Py_BUILD_CORE', '1'), + ('Python/**/*.c', 'Py_BUILD_CORE', '1'), + ('Python/**/*.h', 'Py_BUILD_CORE', '1'), + ('Parser/**/*.c', 'Py_BUILD_CORE', '1'), + ('Parser/**/*.h', 'Py_BUILD_CORE', '1'), + ('Objects/**/*.c', 'Py_BUILD_CORE', '1'), + ('Objects/**/*.h', 'Py_BUILD_CORE', '1'), + + ('Modules/_asynciomodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_codecsmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_collectionsmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_ctypes/_ctypes.c', 'Py_BUILD_CORE', '1'), + ('Modules/_ctypes/cfield.c', 'Py_BUILD_CORE', '1'), + ('Modules/_cursesmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_datetimemodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_functoolsmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_heapqmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_io/*.c', 'Py_BUILD_CORE', '1'), + ('Modules/_io/*.h', 'Py_BUILD_CORE', '1'), + ('Modules/_localemodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_operator.c', 'Py_BUILD_CORE', '1'), + ('Modules/_posixsubprocess.c', 'Py_BUILD_CORE', '1'), + ('Modules/_sre/sre.c', 'Py_BUILD_CORE', '1'), + ('Modules/_threadmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/_tracemalloc.c', 'Py_BUILD_CORE', '1'), + ('Modules/_weakref.c', 'Py_BUILD_CORE', '1'), + ('Modules/_zoneinfo.c', 'Py_BUILD_CORE', '1'), + ('Modules/atexitmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/cmathmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/faulthandler.c', 'Py_BUILD_CORE', '1'), + ('Modules/gcmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/getpath.c', 'Py_BUILD_CORE', '1'), + ('Modules/getpath_noop.c', 'Py_BUILD_CORE', '1'), + ('Modules/itertoolsmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/main.c', 'Py_BUILD_CORE', '1'), + ('Modules/mathmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/posixmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/sha256module.c', 'Py_BUILD_CORE', '1'), + ('Modules/sha512module.c', 'Py_BUILD_CORE', '1'), + ('Modules/signalmodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/symtablemodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/timemodule.c', 'Py_BUILD_CORE', '1'), + ('Modules/unicodedata.c', 'Py_BUILD_CORE', '1'), + + ('Modules/_json.c', 'Py_BUILD_CORE_BUILTIN', '1'), + ('Modules/_pickle.c', 'Py_BUILD_CORE_BUILTIN', '1'), + ('Modules/_testinternalcapi.c', 'Py_BUILD_CORE_BUILTIN', '1'), + + ('Include/cpython/abstract.h', 'Py_CPYTHON_ABSTRACTOBJECT_H', '1'), + ('Include/cpython/bytearrayobject.h', 'Py_CPYTHON_BYTEARRAYOBJECT_H', '1'), + ('Include/cpython/bytesobject.h', 'Py_CPYTHON_BYTESOBJECT_H', '1'), + ('Include/cpython/ceval.h', 'Py_CPYTHON_CEVAL_H', '1'), + ('Include/cpython/code.h', 'Py_CPYTHON_CODE_H', '1'), + ('Include/cpython/dictobject.h', 'Py_CPYTHON_DICTOBJECT_H', '1'), + ('Include/cpython/fileobject.h', 'Py_CPYTHON_FILEOBJECT_H', '1'), + ('Include/cpython/fileutils.h', 'Py_CPYTHON_FILEUTILS_H', '1'), + ('Include/cpython/frameobject.h', 'Py_CPYTHON_FRAMEOBJECT_H', '1'), + ('Include/cpython/import.h', 'Py_CPYTHON_IMPORT_H', '1'), + ('Include/cpython/interpreteridobject.h', 'Py_CPYTHON_INTERPRETERIDOBJECT_H', '1'), + ('Include/cpython/listobject.h', 'Py_CPYTHON_LISTOBJECT_H', '1'), + ('Include/cpython/methodobject.h', 'Py_CPYTHON_METHODOBJECT_H', '1'), + ('Include/cpython/object.h', 'Py_CPYTHON_OBJECT_H', '1'), + ('Include/cpython/objimpl.h', 'Py_CPYTHON_OBJIMPL_H', '1'), + ('Include/cpython/pyerrors.h', 'Py_CPYTHON_ERRORS_H', '1'), + ('Include/cpython/pylifecycle.h', 'Py_CPYTHON_PYLIFECYCLE_H', '1'), + ('Include/cpython/pymem.h', 'Py_CPYTHON_PYMEM_H', '1'), + ('Include/cpython/pystate.h', 'Py_CPYTHON_PYSTATE_H', '1'), + ('Include/cpython/sysmodule.h', 'Py_CPYTHON_SYSMODULE_H', '1'), + ('Include/cpython/traceback.h', 'Py_CPYTHON_TRACEBACK_H', '1'), + ('Include/cpython/tupleobject.h', 'Py_CPYTHON_TUPLEOBJECT_H', '1'), + ('Include/cpython/unicodeobject.h', 'Py_CPYTHON_UNICODEOBJECT_H', '1'), + + # implied include of <unistd.h> + ('Include/**/*.h', '_POSIX_THREADS', '1'), + ('Include/**/*.h', 'HAVE_PTHREAD_H', '1'), + + # from pyconfig.h + ('Include/cpython/pthread_stubs.h', 'HAVE_PTHREAD_STUBS', '1'), + ('Python/thread_pthread_stubs.h', 'HAVE_PTHREAD_STUBS', '1'), + + # from Objects/bytesobject.c + ('Objects/stringlib/partition.h', 'STRINGLIB_GET_EMPTY()', 'bytes_get_empty()'), + ('Objects/stringlib/join.h', 'STRINGLIB_MUTABLE', '0'), + ('Objects/stringlib/partition.h', 'STRINGLIB_MUTABLE', '0'), + ('Objects/stringlib/split.h', 'STRINGLIB_MUTABLE', '0'), + ('Objects/stringlib/transmogrify.h', 'STRINGLIB_MUTABLE', '0'), + + # from Makefile + ('Modules/getpath.c', 'PYTHONPATH', '1'), + ('Modules/getpath.c', 'PREFIX', '...'), + ('Modules/getpath.c', 'EXEC_PREFIX', '...'), + ('Modules/getpath.c', 'VERSION', '...'), + ('Modules/getpath.c', 'VPATH', '...'), + ('Modules/getpath.c', 'PLATLIBDIR', '...'), + # ('Modules/_dbmmodule.c', 'USE_GDBM_COMPAT', '1'), + ('Modules/_dbmmodule.c', 'USE_NDBM', '1'), + # ('Modules/_dbmmodule.c', 'USE_BERKDB', '1'), + + # See: setup.py + ('Modules/_decimal/**/*.c', 'CONFIG_64', '1'), + ('Modules/_decimal/**/*.c', 'ASM', '1'), + ('Modules/expat/xmlparse.c', 'HAVE_EXPAT_CONFIG_H', '1'), + ('Modules/expat/xmlparse.c', 'XML_POOR_ENTROPY', '1'), + ('Modules/_dbmmodule.c', 'HAVE_GDBM_DASH_NDBM_H', '1'), + + # others + ('Modules/_sre/sre_lib.h', 'LOCAL(type)', 'static inline type'), + ('Modules/_sre/sre_lib.h', 'SRE(F)', 'sre_ucs2_##F'), + ('Objects/stringlib/codecs.h', 'STRINGLIB_IS_UNICODE', '1'), + ('Include/internal/pycore_crossinterp_data_registry.h', 'Py_CORE_CROSSINTERP_DATA_REGISTRY_H', '1'), +]) # -pthread # -Wno-unused-result diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index b128abca39fb41..ba9119a06d57e2 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -24,6 +24,7 @@ Modules/posixmodule.c os_dup2_impl dup3_works - ## guards around resource init Python/thread_pthread.h PyThread__init_thread lib_initialized - +Modules/_struct.c - endian_tables_init_once - ##----------------------- ## other values (not Python-specific) @@ -167,6 +168,7 @@ Python/sysmodule.c - _preinit_xoptions - # XXX need race protection? Modules/faulthandler.c faulthandler_dump_traceback reentrant - Modules/faulthandler.c faulthandler_dump_c_stack reentrant - +Modules/grpmodule.c - group_db_mutex - Python/pylifecycle.c _Py_FatalErrorFormat reentrant - Python/pylifecycle.c fatal_error reentrant - @@ -225,6 +227,7 @@ Modules/_datetimemodule.c datetime_isoformat specs - Modules/_datetimemodule.c parse_hh_mm_ss_ff correction - Modules/_datetimemodule.c time_isoformat specs - Modules/_datetimemodule.c - capi_types - +Modules/_datetimemodule.c normalize_century cache - Modules/_decimal/_decimal.c - cond_map_template - Modules/_decimal/_decimal.c - dec_signal_string - Modules/_decimal/_decimal.c - dflt_ctx - diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 9e60d219a71c4a..0b3d89660099de 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -443,7 +443,7 @@ def instruction_size(self, """Replace the INSTRUCTION_SIZE macro with the size of the current instruction.""" if uop.instruction_size is None: raise analysis_error("The INSTRUCTION_SIZE macro requires uop.instruction_size to be set", tkn) - self.out.emit(f" {uop.instruction_size} ") + self.out.emit(f" {uop.instruction_size}u ") return True def _print_storage(self, reason:str, storage: Storage) -> None: diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 1ee4306f3eac1d..72020133738fa5 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -81,7 +81,7 @@ and a piece of C code describing its semantics: (definition | family | pseudo)+ definition: - "inst" "(" NAME ["," stack_effect] ")" "{" C-code "}" + "inst" "(" NAME "," stack_effect ")" "{" C-code "}" | "op" "(" NAME "," stack_effect ")" "{" C-code "}" | diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 620e4b6f1f4a69..10567204dcc599 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -157,6 +157,13 @@ def generate_deopt_table(analysis: Analysis, out: CWriter) -> None: if inst.family is not None: deopt = inst.family.name deopts.append((inst.name, deopt)) + defined = set(analysis.opmap.values()) + for i in range(256): + if i not in defined: + deopts.append((f'{i}', f'{i}')) + + assert len(deopts) == 256 + assert len(set(x[0] for x in deopts)) == 256 for name, deopt in sorted(deopts): out.emit(f"[{name}] = {deopt},\n") out.emit("};\n\n") diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 276f306dfffa18..fc3bc47286f7f6 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -91,7 +91,7 @@ def deopt_if( self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt new file mode 100644 index 00000000000000..c5a277bb3a599d --- /dev/null +++ b/Tools/check-c-api-docs/ignored_c_api.txt @@ -0,0 +1,99 @@ +# pydtrace_probes.h +PyDTrace_AUDIT +PyDTrace_FUNCTION_ENTRY +PyDTrace_FUNCTION_RETURN +PyDTrace_GC_DONE +PyDTrace_GC_START +PyDTrace_IMPORT_FIND_LOAD_DONE +PyDTrace_IMPORT_FIND_LOAD_START +PyDTrace_INSTANCE_DELETE_DONE +PyDTrace_INSTANCE_DELETE_START +PyDTrace_INSTANCE_NEW_DONE +PyDTrace_INSTANCE_NEW_START +PyDTrace_LINE +# fileobject.h +Py_FileSystemDefaultEncodeErrors +Py_FileSystemDefaultEncoding +Py_HasFileSystemDefaultEncoding +Py_UTF8Mode +# pyhash.h +Py_HASH_EXTERNAL +# exports.h +PyAPI_DATA +Py_EXPORTED_SYMBOL +Py_IMPORTED_SYMBOL +Py_LOCAL_SYMBOL +# modsupport.h +PyABIInfo_FREETHREADING_AGNOSTIC +# moduleobject.h +PyModuleDef_Type +# object.h +Py_INVALID_SIZE +Py_TPFLAGS_HAVE_VERSION_TAG +Py_TPFLAGS_INLINE_VALUES +Py_TPFLAGS_IS_ABSTRACT +# pyexpat.h +PyExpat_CAPI_MAGIC +PyExpat_CAPSULE_NAME +# pyport.h +Py_ALIGNED +Py_ARITHMETIC_RIGHT_SHIFT +Py_CAN_START_THREADS +Py_FORCE_EXPANSION +Py_GCC_ATTRIBUTE +Py_LL +Py_SAFE_DOWNCAST +Py_ULL +Py_VA_COPY +# unicodeobject.h +Py_UNICODE_SIZE +# cpython/methodobject.h +PyCFunction_GET_CLASS +# cpython/compile.h +PyCF_ALLOW_INCOMPLETE_INPUT +PyCF_COMPILE_MASK +PyCF_DONT_IMPLY_DEDENT +PyCF_IGNORE_COOKIE +PyCF_MASK +PyCF_MASK_OBSOLETE +PyCF_SOURCE_IS_UTF8 +# cpython/descrobject.h +PyDescr_COMMON +PyDescr_NAME +PyDescr_TYPE +PyWrapperFlag_KEYWORDS +# cpython/fileobject.h +PyFile_NewStdPrinter +PyStdPrinter_Type +Py_UniversalNewlineFgets +# cpython/setobject.h +PySet_MINSIZE +# cpython/ceval.h +PyUnstable_CopyPerfMapFile +PyUnstable_PerfTrampoline_CompileCode +PyUnstable_PerfTrampoline_SetPersistAfterFork +# cpython/genobject.h +PyAsyncGenASend_CheckExact +# cpython/longintrepr.h +PyLong_BASE +PyLong_MASK +PyLong_SHIFT +# cpython/pyerrors.h +PyException_HEAD +# cpython/pyframe.h +PyUnstable_EXECUTABLE_KINDS +PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION +PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR +PyUnstable_EXECUTABLE_KIND_PY_FUNCTION +PyUnstable_EXECUTABLE_KIND_SKIP +# cpython/pylifecycle.h +Py_FrozenMain +# cpython/unicodeobject.h +PyUnicode_IS_COMPACT +PyUnicode_IS_COMPACT_ASCII +# 3.14 only +Py_TPFLAGS_PREHEADER +PyUnicode_AsDecodedObject +PyUnicode_AsDecodedUnicode +PyUnicode_AsEncodedObject +PyUnicode_AsEncodedUnicode diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py new file mode 100644 index 00000000000000..6bdf80a9ae8985 --- /dev/null +++ b/Tools/check-c-api-docs/main.py @@ -0,0 +1,193 @@ +import re +from pathlib import Path +import sys +import _colorize +import textwrap + +SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(") +SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ") +SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)") +SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)") + +CPYTHON = Path(__file__).parent.parent.parent +INCLUDE = CPYTHON / "Include" +C_API_DOCS = CPYTHON / "Doc" / "c-api" +IGNORED = ( + (CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt") + .read_text() + .split("\n") +) + +for index, line in enumerate(IGNORED): + if line.startswith("#"): + IGNORED.pop(index) + +MISTAKE = """ +If this is a mistake and this script should not be failing, create an +issue and tag Peter (@ZeroIntensity) on it.\ +""" + + +def found_undocumented(singular: bool) -> str: + some = "an" if singular else "some" + s = "" if singular else "s" + these = "this" if singular else "these" + them = "it" if singular else "them" + were = "was" if singular else "were" + + return ( + textwrap.dedent( + f""" + Found {some} undocumented C API{s}! + + Python requires documentation on all public C API symbols, macros, and types. + If {these} API{s} {were} not meant to be public, prefix {them} with a + leading underscore (_PySomething_API) or move {them} to the internal C API + (pycore_*.h files). + + In exceptional cases, certain APIs can be ignored by adding them to + Tools/check-c-api-docs/ignored_c_api.txt + """ + ) + + MISTAKE + ) + + +def found_ignored_documented(singular: bool) -> str: + some = "a" if singular else "some" + s = "" if singular else "s" + them = "it" if singular else "them" + were = "was" if singular else "were" + they = "it" if singular else "they" + + return ( + textwrap.dedent( + f""" + Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, but + {they} {were} found in the documentation. To fix this, remove {them} from + ignored_c_api.txt. + """ + ) + + MISTAKE + ) + + +def is_documented(name: str) -> bool: + """ + Is a name present in the C API documentation? + """ + for path in C_API_DOCS.iterdir(): + if path.is_dir(): + continue + if path.suffix != ".rst": + continue + + text = path.read_text(encoding="utf-8") + if name in text: + return True + + return False + + +def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]: + """ + Scan a header file for C API functions. + """ + undocumented: list[str] = [] + documented_ignored: list[str] = [] + colors = _colorize.get_colors() + + def check_for_name(name: str) -> None: + documented = is_documented(name) + if documented and (name in IGNORED): + documented_ignored.append(name) + elif not documented and (name not in IGNORED): + undocumented.append(name) + + for function in SIMPLE_FUNCTION_REGEX.finditer(text): + name = function.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for macro in SIMPLE_MACRO_REGEX.finditer(text): + name = macro.group(1) + if not name.startswith("Py"): + continue + + if "(" in name: + name = name[: name.index("(")] + + check_for_name(name) + + for inline in SIMPLE_INLINE_REGEX.finditer(text): + name = inline.group(2) + if not name.startswith("Py"): + continue + + check_for_name(name) + + for data in SIMPLE_DATA_REGEX.finditer(text): + name = data.group(1) + if not name.startswith("Py"): + continue + + check_for_name(name) + + # Remove duplicates and sort alphabetically to keep the output deterministic + undocumented = list(set(undocumented)) + undocumented.sort() + + if undocumented or documented_ignored: + print(f"{filename} {colors.RED}BAD{colors.RESET}") + for name in undocumented: + print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}") + for name in documented_ignored: + print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}") + else: + print(f"{filename} {colors.GREEN}OK{colors.RESET}") + + return undocumented, documented_ignored + + +def main() -> None: + print("Scanning for undocumented C API functions...") + files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()] + all_missing: list[str] = [] + all_found_ignored: list[str] = [] + + for file in files: + if file.is_dir(): + continue + assert file.exists() + text = file.read_text(encoding="utf-8") + missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text) + all_found_ignored += ignored + all_missing += missing + + fail = False + to_check = [ + (all_missing, "missing", found_undocumented(len(all_missing) == 1)), + ( + all_found_ignored, + "documented but ignored", + found_ignored_documented(len(all_found_ignored) == 1), + ), + ] + for name_list, what, message in to_check: + if not name_list: + continue + + s = "s" if len(name_list) != 1 else "" + print(f"-- {len(name_list)} {what} C API{s} --") + for name in name_list: + print(f" - {name}") + print(message) + fail = True + + sys.exit(1 if fail else 0) + + +if __name__ == "__main__": + main() diff --git a/Tools/clinic/.ruff.toml b/Tools/clinic/.ruff.toml index 5033887df0c1cd..944d17ee3e9855 100644 --- a/Tools/clinic/.ruff.toml +++ b/Tools/clinic/.ruff.toml @@ -17,9 +17,6 @@ ignore = [ # Use f-strings instead of format specifiers. # Doesn't always make code more readable. "UP032", - # Use PEP-604 unions rather than tuples for isinstance() checks. - # Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871. - "UP038", ] unfixable = [ # The autofixes sometimes do the wrong things for these; diff --git a/Tools/clinic/libclinic/converters.py b/Tools/clinic/libclinic/converters.py index 633fb5f56a6693..8c92b766ba0862 100644 --- a/Tools/clinic/libclinic/converters.py +++ b/Tools/clinic/libclinic/converters.py @@ -17,6 +17,54 @@ TypeSet = set[bltns.type[object]] +class BaseUnsignedIntConverter(CConverter): + + def use_converter(self) -> None: + if self.converter: + self.add_include('pycore_long.h', + f'{self.converter}()') + + def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: + if not limited_capi: + return super().parse_arg(argname, displayname, limited_capi=limited_capi) + return self.format_code(""" + {{{{ + Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}), + Py_ASNATIVEBYTES_NATIVE_ENDIAN | + Py_ASNATIVEBYTES_ALLOW_INDEX | + Py_ASNATIVEBYTES_REJECT_NEGATIVE | + Py_ASNATIVEBYTES_UNSIGNED_BUFFER); + if (_bytes < 0) {{{{ + goto exit; + }}}} + if ((size_t)_bytes > sizeof({type})) {{{{ + PyErr_SetString(PyExc_OverflowError, + "Python int too large for C {type}"); + goto exit; + }}}} + }}}} + """, + argname=argname, + type=self.type) + + +class uint8_converter(BaseUnsignedIntConverter): + type = "uint8_t" + converter = '_PyLong_UInt8_Converter' + +class uint16_converter(BaseUnsignedIntConverter): + type = "uint16_t" + converter = '_PyLong_UInt16_Converter' + +class uint32_converter(BaseUnsignedIntConverter): + type = "uint32_t" + converter = '_PyLong_UInt32_Converter' + +class uint64_converter(BaseUnsignedIntConverter): + type = "uint64_t" + converter = '_PyLong_UInt64_Converter' + + class bool_converter(CConverter): type = 'int' default_type = bool @@ -211,29 +259,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st return super().parse_arg(argname, displayname, limited_capi=limited_capi) -def format_inline_unsigned_int_converter(self: CConverter, argname: str) -> str: - return self.format_code(""" - {{{{ - Py_ssize_t _bytes = PyLong_AsNativeBytes({argname}, &{paramname}, sizeof({type}), - Py_ASNATIVEBYTES_NATIVE_ENDIAN | - Py_ASNATIVEBYTES_ALLOW_INDEX | - Py_ASNATIVEBYTES_REJECT_NEGATIVE | - Py_ASNATIVEBYTES_UNSIGNED_BUFFER); - if (_bytes < 0) {{{{ - goto exit; - }}}} - if ((size_t)_bytes > sizeof({type})) {{{{ - PyErr_SetString(PyExc_OverflowError, - "Python int too large for C {type}"); - goto exit; - }}}} - }}}} - """, - argname=argname, - type=self.type) - - -class unsigned_short_converter(CConverter): +class unsigned_short_converter(BaseUnsignedIntConverter): type = 'unsigned short' default_type = int c_ignored_default = "0" @@ -244,11 +270,6 @@ def converter_init(self, *, bitwise: bool = False) -> None: else: self.converter = '_PyLong_UnsignedShort_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedShort_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedShort_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'H': return self.format_code(""" @@ -258,9 +279,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) @add_legacy_c_converter('C', accept={str}) @@ -311,7 +330,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_int_converter(CConverter): +class unsigned_int_converter(BaseUnsignedIntConverter): type = 'unsigned int' default_type = int c_ignored_default = "0" @@ -322,11 +341,6 @@ def converter_init(self, *, bitwise: bool = False) -> None: else: self.converter = '_PyLong_UnsignedInt_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedInt_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedInt_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'I': return self.format_code(""" @@ -336,9 +350,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class long_converter(CConverter): @@ -359,7 +371,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_long_converter(CConverter): +class unsigned_long_converter(BaseUnsignedIntConverter): type = 'unsigned long' default_type = int c_ignored_default = "0" @@ -370,11 +382,6 @@ def converter_init(self, *, bitwise: bool = False) -> None: else: self.converter = '_PyLong_UnsignedLong_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedLong_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedLong_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'k': return self.format_code(""" @@ -387,9 +394,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st argname=argname, bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), ) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class long_long_converter(CConverter): @@ -410,7 +415,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st return super().parse_arg(argname, displayname, limited_capi=limited_capi) -class unsigned_long_long_converter(CConverter): +class unsigned_long_long_converter(BaseUnsignedIntConverter): type = 'unsigned long long' default_type = int c_ignored_default = "0" @@ -421,11 +426,6 @@ def converter_init(self, *, bitwise: bool = False) -> None: else: self.converter = '_PyLong_UnsignedLongLong_Converter' - def use_converter(self) -> None: - if self.converter == '_PyLong_UnsignedLongLong_Converter': - self.add_include('pycore_long.h', - '_PyLong_UnsignedLongLong_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'K': return self.format_code(""" @@ -438,9 +438,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st argname=argname, bad_argument=self.bad_argument(displayname, 'int', limited_capi=limited_capi), ) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class Py_ssize_t_converter(CConverter): @@ -557,15 +555,11 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st argname=argname) -class size_t_converter(CConverter): +class size_t_converter(BaseUnsignedIntConverter): type = 'size_t' converter = '_PyLong_Size_t_Converter' c_ignored_default = "0" - def use_converter(self) -> None: - self.add_include('pycore_long.h', - '_PyLong_Size_t_Converter()') - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'n': return self.format_code(""" @@ -575,9 +569,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st }}}} """, argname=argname) - if not limited_capi: - return super().parse_arg(argname, displayname, limited_capi=limited_capi) - return format_inline_unsigned_int_converter(self, argname) + return super().parse_arg(argname, displayname, limited_capi=limited_capi) class fildes_converter(CConverter): @@ -928,6 +920,26 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st return super().parse_arg(argname, displayname, limited_capi=limited_capi) +class _unicode_fs_converter_base(CConverter): + type = 'PyObject *' + + def converter_init(self) -> None: + if self.default is not unspecified: + fail(f"{self.__class__.__name__} does not support default values") + self.c_default = 'NULL' + + def cleanup(self) -> str: + return f"Py_XDECREF({self.parser_name});" + + +class unicode_fs_encoded_converter(_unicode_fs_converter_base): + converter = 'PyUnicode_FSConverter' + + +class unicode_fs_decoded_converter(_unicode_fs_converter_base): + converter = 'PyUnicode_FSDecoder' + + @add_legacy_c_converter('u') @add_legacy_c_converter('u#', zeroes=True) @add_legacy_c_converter('Z', accept={str, NoneType}) diff --git a/Tools/clinic/libclinic/dsl_parser.py b/Tools/clinic/libclinic/dsl_parser.py index 282ff64cd33089..eca41531f7c8e9 100644 --- a/Tools/clinic/libclinic/dsl_parser.py +++ b/Tools/clinic/libclinic/dsl_parser.py @@ -877,43 +877,16 @@ def parse_parameter(self, line: str) -> None: # handle "as" for parameters too c_name = None - name, have_as_token, trailing = line.partition(' as ') - if have_as_token: - name = name.strip() - if ' ' not in name: - fields = trailing.strip().split(' ') - if not fields: - fail("Invalid 'as' clause!") - c_name = fields[0] - if c_name.endswith(':'): - name += ':' - c_name = c_name[:-1] - fields[0] = name - line = ' '.join(fields) - - default: str | None - base, equals, default = line.rpartition('=') - if not equals: - base = default - default = None - - module = None + m = re.match(r'(?:\* *)?\w+( +as +(\w+))', line) + if m: + c_name = m[2] + line = line[:m.start(1)] + line[m.end(1):] + try: - ast_input = f"def x({base}): pass" + ast_input = f"def x({line}\n): pass" module = ast.parse(ast_input) except SyntaxError: - try: - # the last = was probably inside a function call, like - # c: int(accept={str}) - # so assume there was no actual default value. - default = None - ast_input = f"def x({line}): pass" - module = ast.parse(ast_input) - except SyntaxError: - pass - if not module: - fail(f"Function {self.function.name!r} has an invalid parameter declaration:\n\t", - repr(line)) + fail(f"Function {self.function.name!r} has an invalid parameter declaration: {line!r}") function = module.body[0] assert isinstance(function, ast.FunctionDef) @@ -922,9 +895,6 @@ def parse_parameter(self, line: str) -> None: if len(function_args.args) > 1: fail(f"Function {self.function.name!r} has an " f"invalid parameter declaration (comma?): {line!r}") - if function_args.defaults or function_args.kw_defaults: - fail(f"Function {self.function.name!r} has an " - f"invalid parameter declaration (default value?): {line!r}") if function_args.kwarg: fail(f"Function {self.function.name!r} has an " f"invalid parameter declaration (**kwargs?): {line!r}") @@ -944,7 +914,7 @@ def parse_parameter(self, line: str) -> None: name = 'varpos_' + name value: object - if not default: + if not function_args.defaults: if is_vararg: value = NULL else: @@ -955,17 +925,13 @@ def parse_parameter(self, line: str) -> None: if 'py_default' in kwargs: fail("You can't specify py_default without specifying a default value!") else: - if is_vararg: - fail("Vararg can't take a default value!") + expr = function_args.defaults[0] + default = ast_input[expr.col_offset: expr.end_col_offset].strip() if self.parameter_state is ParamState.REQUIRED: self.parameter_state = ParamState.OPTIONAL - default = default.strip() bad = False - ast_input = f"x = {default}" try: - module = ast.parse(ast_input) - if 'c_default' not in kwargs: # we can only represent very simple data values in C. # detect whether default is okay, via a denylist @@ -992,13 +958,14 @@ def bad_node(self, node: ast.AST) -> None: visit_Starred = bad_node denylist = DetectBadNodes() - denylist.visit(module) + denylist.visit(expr) bad = denylist.bad else: # if they specify a c_default, we can be more lenient about the default value. # but at least make an attempt at ensuring it's a valid expression. + code = compile(ast.Expression(expr), '<expr>', 'eval') try: - value = eval(default) + value = eval(code) except NameError: pass # probably a named constant except Exception as e: @@ -1010,9 +977,6 @@ def bad_node(self, node: ast.AST) -> None: if bad: fail(f"Unsupported expression as default value: {default!r}") - assignment = module.body[0] - assert isinstance(assignment, ast.Assign) - expr = assignment.value # mild hack: explicitly support NULL as a default value c_default: str | None if isinstance(expr, ast.Name) and expr.id == 'NULL': @@ -1064,8 +1028,6 @@ def bad_node(self, node: ast.AST) -> None: else: c_default = py_default - except SyntaxError as e: - fail(f"Syntax error: {e.text!r}") except (ValueError, AttributeError): value = unknown c_default = kwargs.get("c_default") diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 926bc66b944c6f..b815376b7ed56f 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -27,6 +27,7 @@ import sys import threading import time +from dataclasses import dataclass # The iterations in individual benchmarks are scaled by this factor. WORK_SCALE = 100 @@ -189,6 +190,17 @@ def thread_local_read(): _ = tmp.x +@dataclass +class MyDataClass: + x: int + y: int + z: int + +@register_benchmark +def instantiate_dataclass(): + for _ in range(1000 * WORK_SCALE): + obj = MyDataClass(x=1, y=2, z=3) + def bench_one_thread(func): t0 = time.perf_counter_ns() func() diff --git a/Tools/i18n/.ruff.toml b/Tools/i18n/.ruff.toml new file mode 100644 index 00000000000000..a8f4f2f5a96d7b --- /dev/null +++ b/Tools/i18n/.ruff.toml @@ -0,0 +1,10 @@ +extend = "../../.ruff.toml" # Inherit the project-wide settings + +target-version = "py313" + +[lint] +select = [ + "F", # pyflakes + "I", # isort + "UP", # pyupgrade +] diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index b407a8a643be7c..085da1c63b701c 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -8,6 +8,7 @@ """ import locale import sys + _locale = locale # Location of the X11 alias file. @@ -89,16 +90,15 @@ def parse_glibc_supported(filename): def pprint(data): items = sorted(data.items()) for k, v in items: - print(' %-40s%a,' % ('%a:' % k, v)) + print(f" {k!a:<40}{v!a},") def print_differences(data, olddata): items = sorted(olddata.items()) for k, v in items: if k not in data: - print('# removed %a' % k) + print(f'# removed {k!a}') elif olddata[k] != data[k]: - print('# updated %a -> %a to %a' % \ - (k, olddata[k], data[k])) + print(f'# updated {k!a} -> {olddata[k]!a} to {data[k]!a}') # Additions are not mentioned def optimize(data): @@ -121,7 +121,7 @@ def check(data): errors = 0 for k, v in data.items(): if locale.normalize(k) != v: - print('ERROR: %a -> %a != %a' % (k, locale.normalize(k), v), + print(f'ERROR: {k!a} -> {locale.normalize(k)!a} != {v!a}', file=sys.stderr) errors += 1 return errors @@ -131,15 +131,18 @@ def check(data): parser = argparse.ArgumentParser() parser.add_argument('--locale-alias', default=LOCALE_ALIAS, help='location of the X11 alias file ' - '(default: %a)' % LOCALE_ALIAS) + f'(default: {LOCALE_ALIAS})') parser.add_argument('--glibc-supported', default=SUPPORTED, help='location of the glibc SUPPORTED locales file ' - '(default: %a)' % SUPPORTED) + f'(default: {SUPPORTED})') args = parser.parse_args() data = locale.locale_alias.copy() data.update(parse_glibc_supported(args.glibc_supported)) data.update(parse(args.locale_alias)) + # Hardcode 'c.utf8' -> 'C.UTF-8' because 'en_US.UTF-8' does not exist + # on all platforms. + data['c.utf8'] = 'C.UTF-8' while True: # Repeat optimization while the size is decreased. n = len(data) diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index cd5f1ed9f3e268..29b41b4c3c6311 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -25,14 +25,14 @@ Display version information and exit. """ -import os -import sys +import array import ast +import codecs import getopt +import os import struct -import array +import sys from email.parser import HeaderParser -import codecs __version__ = "1.2" @@ -114,7 +114,7 @@ def make(filename, outfile): try: with open(infile, 'rb') as f: lines = f.readlines() - except IOError as msg: + except OSError as msg: print(msg, file=sys.stderr) sys.exit(1) @@ -127,6 +127,7 @@ def make(filename, outfile): sys.exit(1) section = msgctxt = None + msgid = msgstr = b'' fuzzy = 0 # Start off assuming Latin-1, so everything decodes without failure, @@ -178,7 +179,7 @@ def make(filename, outfile): # This is a message with plural forms elif l.startswith('msgid_plural'): if section != ID: - print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), + print(f'msgid_plural not preceded by msgid on {infile}:{lno}', file=sys.stderr) sys.exit(1) l = l[12:] @@ -189,7 +190,7 @@ def make(filename, outfile): section = STR if l.startswith('msgstr['): if not is_plural: - print('plural without msgid_plural on %s:%d' % (infile, lno), + print(f'plural without msgid_plural on {infile}:{lno}', file=sys.stderr) sys.exit(1) l = l.split(']', 1)[1] @@ -197,7 +198,7 @@ def make(filename, outfile): msgstr += b'\0' # Separator of the various plural forms else: if is_plural: - print('indexed msgstr required for plural on %s:%d' % (infile, lno), + print(f'indexed msgstr required for plural on {infile}:{lno}', file=sys.stderr) sys.exit(1) l = l[6:] @@ -213,8 +214,7 @@ def make(filename, outfile): elif section == STR: msgstr += l.encode(encoding) else: - print('Syntax error on %s:%d' % (infile, lno), \ - 'before:', file=sys.stderr) + print(f'Syntax error on {infile}:{lno} before:', file=sys.stderr) print(l, file=sys.stderr) sys.exit(1) # Add last entry @@ -227,7 +227,7 @@ def make(filename, outfile): try: with open(outfile,"wb") as f: f.write(output) - except IOError as msg: + except OSError as msg: print(msg, file=sys.stderr) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f46b05067d7fde..ddf4474d2bce55 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -193,7 +193,7 @@ def make_escapes(pass_nonascii): escape = escape_ascii else: escape = escape_nonascii - escapes = [r"\%03o" % i for i in range(256)] + escapes = [fr"\{i:03o}" for i in range(256)] for i in range(32, 127): escapes[i] = chr(i) escapes[ord('\\')] = r'\\' @@ -796,7 +796,7 @@ class Options: try: with open(options.excludefilename) as fp: options.toexclude = fp.readlines() - except IOError: + except OSError: print(f"Can't read --exclude-file: {options.excludefilename}", file=sys.stderr) sys.exit(1) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 03b0ba647b0db7..5977a7a30502ba 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -140,11 +140,7 @@ class Hole: def __post_init__(self) -> None: self.func = _PATCH_FUNCS[self.kind] - def fold( - self, - other: typing.Self, - body: bytes | bytearray, - ) -> typing.Self | None: + def fold(self, other: typing.Self, body: bytearray) -> typing.Self | None: """Combine two holes into a single hole, if possible.""" instruction_a = int.from_bytes( body[self.offset : self.offset + 4], byteorder=sys.byteorder diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 6ceb4404e74ce7..f1085cc9bf081d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -10,6 +10,7 @@ import sys import tempfile import typing +import shlex import _llvm import _schema @@ -46,7 +47,9 @@ class _Target(typing.Generic[_S, _R]): stable: bool = False debug: bool = False verbose: bool = False + cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: if re.fullmatch(r"aarch64-.*", self.triple): @@ -57,15 +60,19 @@ def _get_nop(self) -> bytes: raise ValueError(f"NOP not defined for {self.triple}") return nop - def _compute_digest(self, out: pathlib.Path) -> str: + def _compute_digest(self) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) + hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) - hasher.update((out / "pyconfig.h").read_bytes()) + hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): - for filename in filenames: + # Exclude cache files from digest computation to ensure reproducible builds. + if dirpath.endswith("__pycache__"): + continue + for filename in sorted(filenames): hasher.update(pathlib.Path(dirpath, filename).read_bytes()) return hasher.hexdigest() @@ -110,7 +117,7 @@ def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: raise NotImplementedError(type(self)) def _handle_relocation( - self, base: int, relocation: _R, raw: bytes | bytearray + self, base: int, relocation: _R, raw: bytearray ) -> _stencils.Hole: raise NotImplementedError(type(self)) @@ -125,7 +132,7 @@ async def _compile( f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", "-D_Py_JIT", - "-I.", + f"-I{self.pyconfig_dir}", f"-I{CPYTHON / 'Include'}", f"-I{CPYTHON / 'Include' / 'internal'}", f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", @@ -154,6 +161,8 @@ async def _compile( f"{o}", f"{c}", *self.args, + # Allow user-provided CFLAGS to override any defaults + *shlex.split(self.cflags), ] await _llvm.run("clang", args, echo=self.verbose) return await self._parse(o) @@ -193,20 +202,19 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: def build( self, - out: pathlib.Path, *, comment: str = "", force: bool = False, - stencils_h: str = "jit_stencils.h", + jit_stencils: pathlib.Path, ) -> None: """Build jit_stencils.h in the given directory.""" + jit_stencils.parent.mkdir(parents=True, exist_ok=True) if not self.stable: warning = f"JIT support for {self.triple} is still experimental!" request = "Please report any issues you encounter.".center(len(warning)) outline = "=" * len(warning) print("\n".join(["", outline, warning, request, outline, ""])) - digest = f"// {self._compute_digest(out)}\n" - jit_stencils = out / stencils_h + digest = f"// {self._compute_digest()}\n" if ( not force and jit_stencils.exists() @@ -214,7 +222,7 @@ def build( ): return stencil_groups = ASYNCIO_RUNNER.run(self._build_stencils()) - jit_stencils_new = out / "jit_stencils.h.new" + jit_stencils_new = jit_stencils.parent / "jit_stencils.h.new" try: with jit_stencils_new.open("w") as file: file.write(digest) @@ -277,10 +285,7 @@ def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None] return _stencils.symbol_to_value(name) def _handle_relocation( - self, - base: int, - relocation: _schema.COFFRelocation, - raw: bytes | bytearray, + self, base: int, relocation: _schema.COFFRelocation, raw: bytearray ) -> _stencils.Hole: match relocation: case { @@ -375,10 +380,7 @@ def _handle_section( }, section_type def _handle_relocation( - self, - base: int, - relocation: _schema.ELFRelocation, - raw: bytes | bytearray, + self, base: int, relocation: _schema.ELFRelocation, raw: bytearray ) -> _stencils.Hole: symbol: str | None match relocation: @@ -454,10 +456,7 @@ def _handle_section( stencil.holes.append(hole) def _handle_relocation( - self, - base: int, - relocation: _schema.MachORelocation, - raw: bytes | bytearray, + self, base: int, relocation: _schema.MachORelocation, raw: bytearray ) -> _stencils.Hole: symbol: str | None match relocation: diff --git a/Tools/jit/build.py b/Tools/jit/build.py index 49b08f477dbed7..a0733005929bf2 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -8,7 +8,6 @@ import _targets if __name__ == "__main__": - out = pathlib.Path.cwd().resolve() comment = f"$ {shlex.join([pathlib.Path(sys.executable).name] + sys.argv)}" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -23,21 +22,39 @@ parser.add_argument( "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) + parser.add_argument( + "-o", + "--output-dir", + help="where to output generated files", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) + parser.add_argument( + "-p", + "--pyconfig-dir", + help="where to find pyconfig.h", + required=True, + type=lambda p: pathlib.Path(p).resolve(), + ) parser.add_argument( "-v", "--verbose", action="store_true", help="echo commands as they are run" ) + parser.add_argument( + "--cflags", help="additional flags to pass to the compiler", default="" + ) args = parser.parse_args() for target in args.target: target.debug = args.debug target.force = args.force target.verbose = args.verbose + target.cflags = args.cflags + target.pyconfig_dir = args.pyconfig_dir target.build( - out, comment=comment, - stencils_h=f"jit_stencils-{target.triple}.h", force=args.force, + jit_stencils=args.output_dir / f"jit_stencils-{target.triple}.h", ) - jit_stencils_h = out / "jit_stencils.h" + jit_stencils_h = args.output_dir / "jit_stencils.h" lines = [f"// {comment}\n"] guard = "#if" for target in args.target: diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 5ee26f93f1e266..d042699680c639 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -70,9 +70,11 @@ do { \ } while (0) #undef LLTRACE_RESUME_FRAME -#define LLTRACE_RESUME_FRAME() \ - do { \ - } while (0) +#ifdef Py_DEBUG +#define LLTRACE_RESUME_FRAME() (frame->lltrace = 0) +#else +#define LLTRACE_RESUME_FRAME() do {} while (0) +#endif #define PATCH_JUMP(ALIAS) \ do { \ diff --git a/Tools/msi/bundle/Default.wxl b/Tools/msi/bundle/Default.wxl index 0bd3644b58b747..335c1d922d97b5 100644 --- a/Tools/msi/bundle/Default.wxl +++ b/Tools/msi/bundle/Default.wxl @@ -92,7 +92,7 @@ Select Customize to review current options.</String> <String Id="PrecompileLabel">&amp;Precompile standard library</String> <String Id="Include_symbolsLabel">Download debugging &amp;symbols</String> <String Id="Include_debugLabel">Download debu&amp;g binaries (requires VS 2017 or later)</String> - <String Id="Include_freethreadedLabel">Download &amp;free-threaded binaries (experimental)</String> + <String Id="Include_freethreadedLabel">Download &amp;free-threaded binaries</String> <String Id="ProgressHeader">[ActionLikeInstallation] Progress</String> <String Id="ProgressLabel">[ActionLikeInstalling]:</String> diff --git a/Tools/msi/bundle/bundle.wxs b/Tools/msi/bundle/bundle.wxs index abfeb88784890c..3fcb00553f5edd 100644 --- a/Tools/msi/bundle/bundle.wxs +++ b/Tools/msi/bundle/bundle.wxs @@ -106,11 +106,11 @@ <Variable Name="SimpleInstallDescription" Value="" bal:Overridable="yes" /> <Chain ParallelCache="yes"> + <PackageGroupRef Id="core" /> + <PackageGroupRef Id="exe" /> <?if $(var.Platform)!="ARM64" ?> <PackageGroupRef Id="crt" /> <?endif ?> - <PackageGroupRef Id="core" /> - <PackageGroupRef Id="exe" /> <PackageGroupRef Id="dev" /> <PackageGroupRef Id="lib" /> <?if $(var.IncludeFreethreaded)~="true" ?> diff --git a/Tools/msi/dev/dev_files.wxs b/Tools/msi/dev/dev_files.wxs index 4357dc86d9d356..21f9c848cc6be5 100644 --- a/Tools/msi/dev/dev_files.wxs +++ b/Tools/msi/dev/dev_files.wxs @@ -3,7 +3,7 @@ <Fragment> <ComponentGroup Id="dev_pyconfig"> <Component Id="include_pyconfig.h" Directory="include" Guid="*"> - <File Id="include_pyconfig.h" Name="pyconfig.h" Source="pyconfig.h" KeyPath="yes" /> + <File Id="include_pyconfig.h" Name="pyconfig.h" Source="!(bindpath.src)PC\pyconfig.h" KeyPath="yes" /> </Component> </ComponentGroup> </Fragment> diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index 86d9a8b83f6535..0707e77b5e9ab2 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -103,7 +103,7 @@ </ComponentGroup> </Fragment> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo;_zstd;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd;_testcapi;_ctypes_test;_testbuffer;_testimportmultiple;_testmultiphase;_testsinglephase;_testconsole;_testinternalcapi;_testclinic;_testclinic_limited;_tkinter ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt__freethreaded" /> diff --git a/Tools/msi/lib/lib.wixproj b/Tools/msi/lib/lib.wixproj index 02078e503d74a4..3ea46dd40ea4ce 100644 --- a/Tools/msi/lib/lib.wixproj +++ b/Tools/msi/lib/lib.wixproj @@ -15,12 +15,11 @@ <EmbeddedResource Include="*.wxl" /> </ItemGroup> <ItemGroup> - <ExcludeFolders Include="Lib\test;Lib\tests;Lib\tkinter;Lib\idlelib;Lib\turtledemo" /> + <ExcludeFolders Include="Lib\site-packages;Lib\test;Lib\tests;Lib\tkinter;Lib\idlelib;Lib\turtledemo" /> <InstallFiles Include="$(PySourcePath)Lib\**\*" Exclude="$(PySourcePath)Lib\**\*.pyc; $(PySourcePath)Lib\**\*.pyo; $(PySourcePath)Lib\turtle.py; - $(PySourcePath)Lib\site-packages\README; @(ExcludeFolders->'$(PySourcePath)%(Identity)\*'); @(ExcludeFolders->'$(PySourcePath)%(Identity)\**\*')"> <SourceBase>$(PySourcePath)Lib</SourceBase> diff --git a/Tools/msi/lib/lib_files.wxs b/Tools/msi/lib/lib_files.wxs index 8439518bcbd804..4d44299f783909 100644 --- a/Tools/msi/lib/lib_files.wxs +++ b/Tools/msi/lib/lib_files.wxs @@ -1,6 +1,6 @@ <?xml version="1.0" encoding="UTF-8"?> <Wix xmlns="http://schemas.microsoft.com/wix/2006/wi"> - <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_uuid;_wmi;_zoneinfo;_zstd ?> + <?define exts=pyexpat;select;unicodedata;winsound;_bz2;_elementtree;_socket;_ssl;_ctypes;_hashlib;_multiprocessing;_lzma;_decimal;_overlapped;_sqlite3;_asyncio;_queue;_remote_debugging;_uuid;_wmi;_zoneinfo;_zstd ?> <Fragment> <DirectoryRef Id="Lib_venv_scripts_nt" /> diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py index 0dcf6ef844a048..afd010a52544a3 100755 --- a/Tools/patchcheck/patchcheck.py +++ b/Tools/patchcheck/patchcheck.py @@ -53,19 +53,43 @@ def get_git_branch(): def get_git_upstream_remote(): - """Get the remote name to use for upstream branches + """ + Get the remote name to use for upstream branches - Uses "upstream" if it exists, "origin" otherwise + Check for presence of "https://github.com/python/cpython" remote URL. + If only one is found, return that remote name. If multiple are found, + check for and return "upstream", "origin", or "python", in that + order. Raise an error if no valid matches are found. """ - cmd = "git remote get-url upstream".split() - try: - subprocess.check_output(cmd, - stderr=subprocess.DEVNULL, - cwd=SRCDIR, - encoding='UTF-8') - except subprocess.CalledProcessError: - return "origin" - return "upstream" + cmd = "git remote -v".split() + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + cwd=SRCDIR, + encoding="UTF-8" + ) + # Filter to desired remotes, accounting for potential uppercasing + filtered_remotes = { + remote.split("\t")[0].lower() for remote in output.split('\n') + if "python/cpython" in remote.lower() and remote.endswith("(fetch)") + } + if len(filtered_remotes) == 1: + [remote] = filtered_remotes + return remote + for remote_name in ["upstream", "origin", "python"]: + if remote_name in filtered_remotes: + return remote_name + remotes_found = "\n".join( + {remote for remote in output.split('\n') if remote.endswith("(fetch)")} + ) + raise ValueError( + f"Patchcheck was unable to find an unambiguous upstream remote, " + f"with URL matching 'https://github.com/python/cpython'. " + f"For help creating an upstream remote, see Dev Guide: " + f"https://devguide.python.org/getting-started/" + f"git-boot-camp/#cloning-a-forked-cpython-repository " + f"\nRemotes found: \n{remotes_found}" + ) def get_git_remote_default_branch(remote_name): diff --git a/Tools/peg_generator/.ruff.toml b/Tools/peg_generator/.ruff.toml new file mode 100644 index 00000000000000..bcf57248713df4 --- /dev/null +++ b/Tools/peg_generator/.ruff.toml @@ -0,0 +1,22 @@ +extend = "../../.ruff.toml" # Inherit the project-wide settings + +extend-exclude = [ + # Generated files: + "Tools/peg_generator/pegen/grammar_parser.py", +] + +[lint] +select = [ + "F", # pyflakes + "I", # isort + "UP", # pyupgrade + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] +unfixable = [ + # The autofixes sometimes do the wrong things for these; + # it's better to have to manually look at the code and see how it needs fixing + "F841", # Detects unused variables + "F601", # Detects dictionaries that have duplicate keys + "F602", # Also detects dictionaries that have duplicate keys +] diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 0b0b4b291c2b0e..a6bc627d47cbbb 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -10,7 +10,6 @@ import time import token import traceback -from typing import Tuple from pegen.grammar import Grammar from pegen.parser import Parser @@ -21,7 +20,7 @@ def generate_c_code( args: argparse.Namespace, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: from pegen.build import build_c_parser_and_generator verbose = args.verbose @@ -50,7 +49,7 @@ def generate_c_code( def generate_python_code( args: argparse.Namespace, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: from pegen.build import build_python_parser_and_generator verbose = args.verbose @@ -188,7 +187,7 @@ def main() -> None: if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 8): # noqa: UP036 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) sys.exit(1) main() diff --git a/Tools/peg_generator/pegen/ast_dump.py b/Tools/peg_generator/pegen/ast_dump.py index 07f8799c114f5d..60dc1b04672e2f 100644 --- a/Tools/peg_generator/pegen/ast_dump.py +++ b/Tools/peg_generator/pegen/ast_dump.py @@ -6,7 +6,7 @@ TODO: Remove the above-described hack. """ -from typing import Any, Optional, Tuple +from typing import Any def ast_dump( @@ -14,9 +14,9 @@ def ast_dump( annotate_fields: bool = True, include_attributes: bool = False, *, - indent: Optional[str] = None, + indent: str | None = None, ) -> str: - def _format(node: Any, level: int = 0) -> Tuple[str, bool]: + def _format(node: Any, level: int = 0) -> tuple[str, bool]: if indent is not None: level += 1 prefix = "\n" + indent * level @@ -41,7 +41,7 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]: value, simple = _format(value, level) allsimple = allsimple and simple if keywords: - args.append("%s=%s" % (name, value)) + args.append(f"{name}={value}") else: args.append(value) if include_attributes and node._attributes: @@ -54,16 +54,16 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]: continue value, simple = _format(value, level) allsimple = allsimple and simple - args.append("%s=%s" % (name, value)) + args.append(f"{name}={value}") if allsimple and len(args) <= 3: - return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args - return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False + return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args + return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False elif isinstance(node, list): if not node: return "[]", True - return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False + return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False return repr(node), True if all(cls.__name__ != "AST" for cls in node.__class__.__mro__): - raise TypeError("expected AST, got %r" % node.__class__.__name__) + raise TypeError(f"expected AST, got {node.__class__.__name__!r}") return _format(node)[0] diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 41338c29bdd9eb..44a58581686a4f 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -6,7 +6,7 @@ import sysconfig import tempfile import tokenize -from typing import IO, Any, Dict, List, Optional, Set, Tuple +from typing import IO, Any from pegen.c_generator import CParserGenerator from pegen.grammar import Grammar @@ -18,11 +18,11 @@ MOD_DIR = pathlib.Path(__file__).resolve().parent -TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] +TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]] Incomplete = Any # TODO: install `types-setuptools` and remove this alias -def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: +def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]: flags = sysconfig.get_config_var(compiler_flags) py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) if flags is None or py_flags_nodist is None: @@ -71,11 +71,11 @@ def fixup_build_ext(cmd: Incomplete) -> None: def compile_c_extension( generated_source_path: str, - build_dir: Optional[str] = None, + build_dir: str | None = None, verbose: bool = False, keep_asserts: bool = True, disable_optimization: bool = False, - library_dir: Optional[str] = None, + library_dir: str | None = None, ) -> pathlib.Path: """Compile the generated source for a parser generator into an extension module. @@ -93,11 +93,10 @@ def compile_c_extension( """ import setuptools.command.build_ext import setuptools.logging - - from setuptools import Extension, Distribution - from setuptools.modified import newer_group + from setuptools import Distribution, Extension from setuptools._distutils.ccompiler import new_compiler from setuptools._distutils.sysconfig import customize_compiler + from setuptools.modified import newer_group if verbose: setuptools.logging.set_threshold(logging.DEBUG) @@ -108,6 +107,8 @@ def compile_c_extension( extra_compile_args.append("-DPy_BUILD_CORE_MODULE") # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c extra_compile_args.append("-D_Py_TEST_PEGEN") + if sys.platform == "win32" and sysconfig.get_config_var("Py_GIL_DISABLED"): + extra_compile_args.append("-DPy_GIL_DISABLED") extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") if keep_asserts: extra_compile_args.append("-UNDEBUG") @@ -239,7 +240,7 @@ def compile_c_extension( def build_parser( grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False -) -> Tuple[Grammar, Parser, Tokenizer]: +) -> tuple[Grammar, Parser, Tokenizer]: with open(grammar_file) as file: tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) @@ -290,7 +291,7 @@ def build_c_generator( keep_asserts_in_extension: bool = True, skip_actions: bool = False, ) -> ParserGenerator: - with open(tokens_file, "r") as tok_file: + with open(tokens_file) as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) with open(output_file, "w") as file: gen: ParserGenerator = CParserGenerator( @@ -331,7 +332,7 @@ def build_c_parser_and_generator( verbose_c_extension: bool = False, keep_asserts_in_extension: bool = True, skip_actions: bool = False, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: """Generate rules, C parser, tokenizer, parser generator for a given grammar Args: @@ -371,7 +372,7 @@ def build_python_parser_and_generator( verbose_tokenizer: bool = False, verbose_parser: bool = False, skip_actions: bool = False, -) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: +) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]: """Generate rules, python parser, tokenizer, parser generator for a given grammar Args: diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 2be85a163b4043..fa75174ea0d59d 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,9 +1,10 @@ import ast import os.path import re +from collections.abc import Callable from dataclasses import dataclass, field from enum import Enum -from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple +from typing import IO, Any from pegen import grammar from pegen.grammar import ( @@ -44,7 +45,7 @@ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif """ @@ -86,13 +87,13 @@ class NodeTypes(Enum): @dataclass class FunctionCall: function: str - arguments: List[Any] = field(default_factory=list) - assigned_variable: Optional[str] = None - assigned_variable_type: Optional[str] = None - return_type: Optional[str] = None - nodetype: Optional[NodeTypes] = None + arguments: list[Any] = field(default_factory=list) + assigned_variable: str | None = None + assigned_variable_type: str | None = None + return_type: str | None = None + nodetype: NodeTypes | None = None force_true: bool = False - comment: Optional[str] = None + comment: str | None = None def __str__(self) -> str: parts = [] @@ -124,14 +125,14 @@ class CCallMakerVisitor(GrammarVisitor): def __init__( self, parser_generator: ParserGenerator, - exact_tokens: Dict[str, int], - non_exact_tokens: Set[str], + exact_tokens: dict[str, int], + non_exact_tokens: set[str], ): self.gen = parser_generator self.exact_tokens = exact_tokens self.non_exact_tokens = non_exact_tokens - self.cache: Dict[str, str] = {} - self.cleanup_statements: List[str] = [] + self.cache: dict[str, str] = {} + self.cleanup_statements: list[str] = [] def keyword_helper(self, keyword: str) -> FunctionCall: return FunctionCall( @@ -167,7 +168,7 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: ) return FunctionCall( assigned_variable=f"{name.lower()}_var", - function=f"_PyPegen_expect_token", + function="_PyPegen_expect_token", arguments=["p", name], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -199,7 +200,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: type = self.exact_tokens[val] return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_token", + function="_PyPegen_expect_token", arguments=["p", type], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -214,33 +215,47 @@ def visit_NamedItem(self, node: NamedItem) -> FunctionCall: call.assigned_variable_type = node.type return call + def assert_no_undefined_behavior( + self, call: FunctionCall, wrapper: str, expected_rtype: str | None, + ) -> None: + if call.return_type != expected_rtype: + raise RuntimeError( + f"{call.function} return type is incompatible with {wrapper}: " + f"expect: {expected_rtype}, actual: {call.return_type}" + ) + def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: call = self.generate_call(node.node) - if call.nodetype == NodeTypes.NAME_TOKEN: - return FunctionCall( - function=f"_PyPegen_lookahead_with_name", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + comment = None + if call.nodetype is NodeTypes.NAME_TOKEN: + function = "_PyPegen_lookahead_for_expr" + self.assert_no_undefined_behavior(call, function, "expr_ty") + elif call.nodetype is NodeTypes.STRING_TOKEN: + # _PyPegen_string_token() returns 'void *' instead of 'Token *'; + # in addition, the overall function call would return 'expr_ty'. + assert call.function == "_PyPegen_string_token" + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype == NodeTypes.SOFT_KEYWORD: - return FunctionCall( - function=f"_PyPegen_lookahead_with_string", - arguments=[positive, call.function, *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead_with_string" + self.assert_no_undefined_behavior(call, function, "expr_ty") elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: - return FunctionCall( - function=f"_PyPegen_lookahead_with_int", - arguments=[positive, call.function, *call.arguments], - return_type="int", - comment=f"token={node.node}", - ) + function = "_PyPegen_lookahead_with_int" + self.assert_no_undefined_behavior(call, function, "Token *") + comment = f"token={node.node}" + elif call.return_type == "expr_ty": + function = "_PyPegen_lookahead_for_expr" + elif call.return_type == "stmt_ty": + function = "_PyPegen_lookahead_for_stmt" else: - return FunctionCall( - function=f"_PyPegen_lookahead", - arguments=[positive, f"(void *(*)(Parser *)) {call.function}", *call.arguments], - return_type="int", - ) + function = "_PyPegen_lookahead" + self.assert_no_undefined_behavior(call, function, None) + return FunctionCall( + function=function, + arguments=[positive, call.function, *call.arguments], + return_type="int", + comment=comment, + ) def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: return self.lookahead_call_helper(node, 1) @@ -257,7 +272,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall: type = self.exact_tokens[val] return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_forced_token", + function="_PyPegen_expect_forced_token", arguments=["p", type, f'"{val}"'], nodetype=NodeTypes.GENERIC_TOKEN, return_type="Token *", @@ -269,7 +284,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall: call.comment = None return FunctionCall( assigned_variable="_literal", - function=f"_PyPegen_expect_forced_result", + function="_PyPegen_expect_forced_result", arguments=["p", str(call), f'"{node.node.rhs!s}"'], return_type="void *", comment=f"forced_token=({node.node.rhs!s})", @@ -292,7 +307,7 @@ def _generate_artificial_rule_call( node: Any, prefix: str, rule_generation_func: Callable[[], str], - return_type: Optional[str] = None, + return_type: str | None = None, ) -> FunctionCall: node_str = f"{node}" key = f"{prefix}_{node_str}" @@ -363,10 +378,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def __init__( self, grammar: grammar.Grammar, - tokens: Dict[int, str], - exact_tokens: Dict[str, int], - non_exact_tokens: Set[str], - file: Optional[IO[Text]], + tokens: dict[int, str], + exact_tokens: dict[str, int], + non_exact_tokens: set[str], + file: IO[str] | None, debug: bool = False, skip_actions: bool = False, ): @@ -377,7 +392,7 @@ def __init__( self._varname_counter = 0 self.debug = debug self.skip_actions = skip_actions - self.cleanup_statements: List[str] = [] + self.cleanup_statements: list[str] = [] def add_level(self) -> None: self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {") @@ -413,12 +428,12 @@ def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: self.print(f"if ({error_var}) {{") with self.indent(): self.print(f"goto {goto_target};") - self.print(f"}}") + self.print("}") def out_of_memory_return( self, expr: str, - cleanup_code: Optional[str] = None, + cleanup_code: str | None = None, ) -> None: self.print(f"if ({expr}) {{") with self.indent(): @@ -427,14 +442,14 @@ def out_of_memory_return( self.print("p->error_indicator = 1;") self.print("PyErr_NoMemory();") self.add_return("NULL") - self.print(f"}}") + self.print("}") def out_of_memory_goto(self, expr: str, goto_target: str) -> None: self.print(f"if ({expr}) {{") with self.indent(): self.print("PyErr_NoMemory();") self.print(f"goto {goto_target};") - self.print(f"}}") + self.print("}") def generate(self, filename: str) -> None: self.collect_rules() @@ -477,8 +492,8 @@ def generate(self, filename: str) -> None: if trailer: self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) - def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: - groups: Dict[int, List[Tuple[str, int]]] = {} + def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]: + groups: dict[int, list[tuple[str, int]]] = {} for keyword_str, keyword_type in self.keywords.items(): length = len(keyword_str) if length in groups: @@ -570,10 +585,10 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: self.print("if (_raw == NULL || p->mark <= _resmark)") with self.indent(): self.print("break;") - self.print(f"_resmark = p->mark;") + self.print("_resmark = p->mark;") self.print("_res = _raw;") self.print("}") - self.print(f"p->mark = _resmark;") + self.print("p->mark = _resmark;") self.add_return("_res") self.print("}") self.print(f"static {result_type}") @@ -629,7 +644,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: if memoize: self.print("int _start_mark = p->mark;") self.print("void **_children = PyMem_Malloc(sizeof(void *));") - self.out_of_memory_return(f"!_children") + self.out_of_memory_return("!_children") self.print("Py_ssize_t _children_capacity = 1;") self.print("Py_ssize_t _n = 0;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): @@ -647,7 +662,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: self.add_return("NULL") self.print("}") self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") - self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") + self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);") self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") self.print("PyMem_Free(_children);") if memoize and node.name: @@ -701,7 +716,7 @@ def visit_NamedItem(self, node: NamedItem) -> None: self.print(call) def visit_Rhs( - self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] + self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None ) -> None: if is_loop: assert len(node.alts) == 1 @@ -720,7 +735,7 @@ def join_conditions(self, keyword: str, node: Any) -> None: self.visit(item) self.print(")") - def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: + def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None: self.print(f"_res = {node.action};") self.print("if (_res == NULL && PyErr_Occurred()) {") @@ -762,7 +777,7 @@ def emit_default_action(self, is_gather: bool, node: Alt) -> None: def emit_dummy_action(self) -> None: self.print("_res = _PyPegen_dummy_name(p);") - def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: + def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None: self.join_conditions(keyword="if", node=node) self.print("{") # We have parsed successfully all the conditions for the option. @@ -782,10 +797,10 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) self.emit_default_action(is_gather, node) # As the current option has parsed correctly, do not continue with the rest. - self.print(f"goto done;") + self.print("goto done;") self.print("}") - def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: + def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None: # Condition of the main body of the alternative self.join_conditions(keyword="while", node=node) self.print("{") @@ -809,7 +824,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) - self.print( "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" ) - self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);") + self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);") self.print("_children = _new_children;") self.print("}") self.print("_children[_n++] = _res;") @@ -817,7 +832,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) - self.print("}") def visit_Alt( - self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] + self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None ) -> None: if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): self.print(f"if (p->call_invalid_rules) {{ // {node}") @@ -861,7 +876,7 @@ def visit_Alt( self.print("}") self.print("}") - def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: + def collect_vars(self, node: Alt) -> dict[str | None, str | None]: types = {} with self.local_variable_context(): for item in node.items: @@ -869,7 +884,7 @@ def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: types[name] = type return types - def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: + def add_var(self, node: NamedItem) -> tuple[str | None, str | None]: call = self.callmakervisitor.generate_call(node.item) name = node.name if node.name else call.assigned_variable if name is not None: diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 6d794ffa4bfa8b..f6b83c0ca31e3f 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -3,7 +3,6 @@ import argparse import pprint import sys -from typing import Dict, Set from pegen.build import build_parser from pegen.grammar import ( @@ -33,20 +32,20 @@ class FirstSetCalculator(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules self.nullables = compute_nullables(rules) - self.first_sets: Dict[str, Set[str]] = dict() - self.in_process: Set[str] = set() + self.first_sets: dict[str, set[str]] = dict() + self.in_process: set[str] = set() - def calculate(self) -> Dict[str, Set[str]]: + def calculate(self) -> dict[str, set[str]]: for name, rule in self.rules.items(): self.visit(rule) return self.first_sets - def visit_Alt(self, item: Alt) -> Set[str]: - result: Set[str] = set() - to_remove: Set[str] = set() + def visit_Alt(self, item: Alt) -> set[str]: + result: set[str] = set() + to_remove: set[str] = set() for other in item.items: new_terminals = self.visit(other) if isinstance(other.item, NegativeLookahead): @@ -71,34 +70,34 @@ def visit_Alt(self, item: Alt) -> Set[str]: return result - def visit_Cut(self, item: Cut) -> Set[str]: + def visit_Cut(self, item: Cut) -> set[str]: return set() - def visit_Group(self, item: Group) -> Set[str]: + def visit_Group(self, item: Group) -> set[str]: return self.visit(item.rhs) - def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: + def visit_PositiveLookahead(self, item: Lookahead) -> set[str]: return self.visit(item.node) - def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: + def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]: return self.visit(item.node) - def visit_NamedItem(self, item: NamedItem) -> Set[str]: + def visit_NamedItem(self, item: NamedItem) -> set[str]: return self.visit(item.item) - def visit_Opt(self, item: Opt) -> Set[str]: + def visit_Opt(self, item: Opt) -> set[str]: return self.visit(item.node) - def visit_Gather(self, item: Gather) -> Set[str]: + def visit_Gather(self, item: Gather) -> set[str]: return self.visit(item.node) - def visit_Repeat0(self, item: Repeat0) -> Set[str]: + def visit_Repeat0(self, item: Repeat0) -> set[str]: return self.visit(item.node) - def visit_Repeat1(self, item: Repeat1) -> Set[str]: + def visit_Repeat1(self, item: Repeat1) -> set[str]: return self.visit(item.node) - def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: + def visit_NameLeaf(self, item: NameLeaf) -> set[str]: if item.value not in self.rules: return {item.value} @@ -110,16 +109,16 @@ def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: return self.first_sets[item.value] - def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: + def visit_StringLeaf(self, item: StringLeaf) -> set[str]: return {item.value} - def visit_Rhs(self, item: Rhs) -> Set[str]: - result: Set[str] = set() + def visit_Rhs(self, item: Rhs) -> set[str]: + result: set[str] = set() for alt in item.alts: result |= self.visit(alt) return result - def visit_Rule(self, item: Rule) -> Set[str]: + def visit_Rule(self, item: Rule) -> set[str]: if item.name in self.in_process: return set() elif item.name not in self.first_sets: @@ -138,7 +137,7 @@ def main() -> None: try: grammar, parser, tokenizer = build_parser(args.grammar_file) except Exception as err: - print("ERROR: Failed to parse grammar file", file=sys.stderr) + print("ERROR: Failed to parse grammar file", err, file=sys.stderr) sys.exit(1) firs_sets = FirstSetCalculator(grammar.rules).calculate() diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index 1ee9d100b61f49..cca8584a632071 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -1,15 +1,7 @@ from __future__ import annotations -from typing import ( - AbstractSet, - Any, - Iterable, - Iterator, - List, - Optional, - Tuple, - Union, -) +from collections.abc import Iterable, Iterator, Set +from typing import Any class GrammarError(Exception): @@ -34,7 +26,7 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any: class Grammar: - def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]): + def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]): # Check if there are repeated rules in "rules" all_rules = {} for rule in rules: @@ -66,7 +58,7 @@ def __iter__(self) -> Iterator[Rule]: class Rule: - def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None): + def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None): self.name = name self.type = type self.rhs = rhs @@ -141,9 +133,9 @@ def __repr__(self) -> str: class Rhs: - def __init__(self, alts: List[Alt]): + def __init__(self, alts: list[Alt]): self.alts = alts - self.memo: Optional[Tuple[Optional[str], str]] = None + self.memo: tuple[str | None, str] | None = None def __str__(self) -> str: return " | ".join(str(alt) for alt in self.alts) @@ -151,7 +143,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Rhs({self.alts!r})" - def __iter__(self) -> Iterator[List[Alt]]: + def __iter__(self) -> Iterator[list[Alt]]: yield self.alts @property @@ -165,7 +157,7 @@ def can_be_inlined(self) -> bool: class Alt: - def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None): + def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None): self.items = items self.icut = icut self.action = action @@ -185,12 +177,12 @@ def __repr__(self) -> str: args.append(f"action={self.action!r}") return f"Alt({', '.join(args)})" - def __iter__(self) -> Iterator[List[NamedItem]]: + def __iter__(self) -> Iterator[list[NamedItem]]: yield self.items class NamedItem: - def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): + def __init__(self, name: str | None, item: Item, type: str | None = None): self.name = name self.item = item self.type = type @@ -271,7 +263,7 @@ class Repeat: def __init__(self, node: Plain): self.node = node - self.memo: Optional[Tuple[Optional[str], str]] = None + self.memo: tuple[str | None, str] | None = None def __iter__(self) -> Iterator[Plain]: yield self.node @@ -334,12 +326,12 @@ def __init__(self) -> None: pass def __repr__(self) -> str: - return f"Cut()" + return "Cut()" def __str__(self) -> str: - return f"~" + return "~" - def __iter__(self) -> Iterator[Tuple[str, str]]: + def __iter__(self) -> Iterator[tuple[str, str]]: yield from () def __eq__(self, other: object) -> bool: @@ -347,15 +339,15 @@ def __eq__(self, other: object) -> bool: return NotImplemented return True - def initial_names(self) -> AbstractSet[str]: + def initial_names(self) -> Set[str]: return set() -Plain = Union[Leaf, Group] -Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut] -RuleName = Tuple[str, Optional[str]] -MetaTuple = Tuple[str, Optional[str]] -MetaList = List[MetaTuple] -RuleList = List[Rule] -NamedItemList = List[NamedItem] -LookaheadOrCut = Union[Lookahead, Cut] +Plain = Leaf | Group +Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut +RuleName = tuple[str, str | None] +MetaTuple = tuple[str, str | None] +MetaList = list[MetaTuple] +RuleList = list[Rule] +NamedItemList = list[NamedItem] +LookaheadOrCut = Lookahead | Cut diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py index 11f784f45b66b8..eadb6987389b88 100644 --- a/Tools/peg_generator/pegen/grammar_visualizer.py +++ b/Tools/peg_generator/pegen/grammar_visualizer.py @@ -1,6 +1,7 @@ import argparse import sys -from typing import Any, Callable, Iterator +from collections.abc import Callable, Iterator +from typing import Any from pegen.build import build_parser from pegen.grammar import Grammar, Rule @@ -52,7 +53,7 @@ def main() -> None: try: grammar, parser, tokenizer = build_parser(args.filename) except Exception as err: - print("ERROR: Failed to parse grammar file", file=sys.stderr) + print("ERROR: Failed to parse grammar file", err, file=sys.stderr) sys.exit(1) visitor = ASTGrammarPrinter() diff --git a/Tools/peg_generator/pegen/parser.py b/Tools/peg_generator/pegen/parser.py index a987d30a9d6438..806003c8350c03 100644 --- a/Tools/peg_generator/pegen/parser.py +++ b/Tools/peg_generator/pegen/parser.py @@ -5,7 +5,8 @@ import tokenize import traceback from abc import abstractmethod -from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast +from collections.abc import Callable +from typing import Any, ClassVar, TypeVar, cast from pegen.tokenizer import Mark, Tokenizer, exact_token_types @@ -74,12 +75,12 @@ def memoize_wrapper(self: "Parser", *args: object) -> Any: def memoize_left_rec( - method: Callable[["Parser"], Optional[T]] -) -> Callable[["Parser"], Optional[T]]: + method: Callable[["Parser"], T | None] +) -> Callable[["Parser"], T | None]: """Memoize a left-recursive symbol method.""" method_name = method.__name__ - def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]: + def memoize_left_rec_wrapper(self: "Parser") -> T | None: mark = self._mark() key = mark, method_name, () # Fast path: cache hit, and not verbose. @@ -160,15 +161,15 @@ def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]: class Parser: """Parsing base class.""" - KEYWORDS: ClassVar[Tuple[str, ...]] + KEYWORDS: ClassVar[tuple[str, ...]] - SOFT_KEYWORDS: ClassVar[Tuple[str, ...]] + SOFT_KEYWORDS: ClassVar[tuple[str, ...]] def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False): self._tokenizer = tokenizer self._verbose = verbose self._level = 0 - self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {} + self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {} # Integer tracking whether we are in a left recursive rule or not. Can be useful # for error reporting. self.in_recursive_rule = 0 @@ -185,28 +186,28 @@ def showpeek(self) -> str: return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" @memoize - def name(self) -> Optional[tokenize.TokenInfo]: + def name(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NAME and tok.string not in self.KEYWORDS: return self._tokenizer.getnext() return None @memoize - def number(self) -> Optional[tokenize.TokenInfo]: + def number(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NUMBER: return self._tokenizer.getnext() return None @memoize - def string(self) -> Optional[tokenize.TokenInfo]: + def string(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.STRING: return self._tokenizer.getnext() return None @memoize - def fstring_start(self) -> Optional[tokenize.TokenInfo]: + def fstring_start(self) -> tokenize.TokenInfo | None: FSTRING_START = getattr(token, "FSTRING_START", None) if not FSTRING_START: return None @@ -216,7 +217,7 @@ def fstring_start(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def fstring_middle(self) -> Optional[tokenize.TokenInfo]: + def fstring_middle(self) -> tokenize.TokenInfo | None: FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None) if not FSTRING_MIDDLE: return None @@ -226,7 +227,7 @@ def fstring_middle(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def fstring_end(self) -> Optional[tokenize.TokenInfo]: + def fstring_end(self) -> tokenize.TokenInfo | None: FSTRING_END = getattr(token, "FSTRING_END", None) if not FSTRING_END: return None @@ -236,28 +237,28 @@ def fstring_end(self) -> Optional[tokenize.TokenInfo]: return None @memoize - def op(self) -> Optional[tokenize.TokenInfo]: + def op(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.OP: return self._tokenizer.getnext() return None @memoize - def type_comment(self) -> Optional[tokenize.TokenInfo]: + def type_comment(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.TYPE_COMMENT: return self._tokenizer.getnext() return None @memoize - def soft_keyword(self) -> Optional[tokenize.TokenInfo]: + def soft_keyword(self) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS: return self._tokenizer.getnext() return None @memoize - def expect(self, type: str) -> Optional[tokenize.TokenInfo]: + def expect(self, type: str) -> tokenize.TokenInfo | None: tok = self._tokenizer.peek() if tok.string == type: return self._tokenizer.getnext() @@ -271,7 +272,7 @@ def expect(self, type: str) -> Optional[tokenize.TokenInfo]: return self._tokenizer.getnext() return None - def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]: + def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None: if res is None: raise self.make_syntax_error(f"expected {expectation}") return res @@ -293,7 +294,7 @@ def make_syntax_error(self, message: str, filename: str = "<unknown>") -> Syntax return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line)) -def simple_parser_main(parser_class: Type[Parser]) -> None: +def simple_parser_main(parser_class: type[Parser]) -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "-v", @@ -330,7 +331,7 @@ def simple_parser_main(parser_class: Type[Parser]) -> None: endpos = 0 else: endpos = file.tell() - except IOError: + except OSError: endpos = 0 finally: if file is not sys.stdin: diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 6ce0649aefe7ff..43054c258d2a05 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,22 +1,10 @@ -import sys import ast import contextlib import re +import sys from abc import abstractmethod -from typing import ( - IO, - AbstractSet, - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Set, - Text, - Tuple, - Union, -) +from collections.abc import Iterable, Iterator, Set +from typing import IO, Any from pegen import sccutils from pegen.grammar import ( @@ -44,8 +32,8 @@ class RuleCollectorVisitor(GrammarVisitor): """Visitor that invokes a provided callmaker visitor with just the NamedItem nodes""" - def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: - self.rulses = rules + def __init__(self, rules: dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: + self.rules = rules self.callmaker = callmakervisitor def visit_Rule(self, rule: Rule) -> None: @@ -56,9 +44,9 @@ def visit_NamedItem(self, item: NamedItem) -> None: class KeywordCollectorVisitor(GrammarVisitor): - """Visitor that collects all the keywods and soft keywords in the Grammar""" + """Visitor that collects all the keywords and soft keywords in the Grammar""" - def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): + def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]): self.generator = gen self.keywords = keywords self.soft_keywords = soft_keywords @@ -73,7 +61,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> None: class RuleCheckingVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): + def __init__(self, rules: dict[str, Rule], tokens: set[str]): self.rules = rules self.tokens = tokens # If python < 3.12 add the virtual fstring tokens @@ -81,6 +69,11 @@ def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): self.tokens.add("FSTRING_START") self.tokens.add("FSTRING_END") self.tokens.add("FSTRING_MIDDLE") + # If python < 3.14 add the virtual tstring tokens + if sys.version_info < (3, 14, 0, 'beta', 1): + self.tokens.add("TSTRING_START") + self.tokens.add("TSTRING_END") + self.tokens.add("TSTRING_MIDDLE") def visit_NameLeaf(self, node: NameLeaf) -> None: if node.value not in self.rules and node.value not in self.tokens: @@ -95,11 +88,11 @@ def visit_NamedItem(self, node: NamedItem) -> None: class ParserGenerator: callmakervisitor: GrammarVisitor - def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): + def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None): self.grammar = grammar self.tokens = tokens - self.keywords: Dict[str, int] = {} - self.soft_keywords: Set[str] = set() + self.keywords: dict[str, int] = {} + self.soft_keywords: set[str] = set() self.rules = grammar.rules self.validate_rule_names() if "trailer" not in grammar.metas and "start" not in self.rules: @@ -112,8 +105,8 @@ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]) self.first_graph, self.first_sccs = compute_left_recursives(self.rules) self.counter = 0 # For name_rule()/name_loop() self.keyword_counter = 499 # For keyword_type() - self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules - self._local_variable_stack: List[List[str]] = [] + self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules + self._local_variable_stack: list[list[str]] = [] def validate_rule_names(self) -> None: for rule in self.rules: @@ -127,7 +120,7 @@ def local_variable_context(self) -> Iterator[None]: self._local_variable_stack.pop() @property - def local_variable_names(self) -> List[str]: + def local_variable_names(self) -> list[str]: return self._local_variable_stack[-1] @abstractmethod @@ -159,7 +152,7 @@ def collect_rules(self) -> None: keyword_collector.visit(rule) rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) - done: Set[str] = set() + done: set[str] = set() while True: computed_rules = list(self.all_rules) todo = [i for i in computed_rules if i not in done] @@ -224,10 +217,10 @@ def dedupe(self, name: str) -> str: class NullableVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules - self.visited: Set[Any] = set() - self.nullables: Set[Union[Rule, NamedItem]] = set() + self.visited: set[Any] = set() + self.nullables: set[Rule | NamedItem] = set() def visit_Rule(self, rule: Rule) -> bool: if rule in self.visited: @@ -289,7 +282,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> bool: return not node.value -def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: +def compute_nullables(rules: dict[str, Rule]) -> set[Any]: """Compute which rules in a grammar are nullable. Thanks to TatSu (tatsu/leftrec.py) for inspiration. @@ -301,12 +294,12 @@ def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: class InitialNamesVisitor(GrammarVisitor): - def __init__(self, rules: Dict[str, Rule]) -> None: + def __init__(self, rules: dict[str, Rule]) -> None: self.rules = rules self.nullables = compute_nullables(rules) - def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: - names: Set[str] = set() + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]: + names: set[str] = set() for value in node: if isinstance(value, list): for item in value: @@ -315,33 +308,33 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[A names |= self.visit(value, *args, **kwargs) return names - def visit_Alt(self, alt: Alt) -> Set[Any]: - names: Set[str] = set() + def visit_Alt(self, alt: Alt) -> set[Any]: + names: set[str] = set() for item in alt.items: names |= self.visit(item) if item not in self.nullables: break return names - def visit_Forced(self, force: Forced) -> Set[Any]: + def visit_Forced(self, force: Forced) -> set[Any]: return set() - def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: + def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]: return set() - def visit_Cut(self, cut: Cut) -> Set[Any]: + def visit_Cut(self, cut: Cut) -> set[Any]: return set() - def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: + def visit_NameLeaf(self, node: NameLeaf) -> set[Any]: return {node.value} - def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: + def visit_StringLeaf(self, node: StringLeaf) -> set[Any]: return set() def compute_left_recursives( - rules: Dict[str, Rule] -) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: + rules: dict[str, Rule] +) -> tuple[dict[str, Set[str]], list[Set[str]]]: graph = make_first_graph(rules) sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) for scc in sccs: @@ -369,7 +362,7 @@ def compute_left_recursives( return graph, sccs -def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: +def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]: """Compute the graph of left-invocations. There's an edge from A to B if A may invoke B at its initial @@ -379,7 +372,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: """ initial_name_visitor = InitialNamesVisitor(rules) graph = {} - vertices: Set[str] = set() + vertices: set[str] = set() for rulename, rhs in rules.items(): graph[rulename] = names = initial_name_visitor.visit(rhs) vertices |= names diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 4bb26480ebc0af..e69ea4b5f4e14c 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,6 +1,7 @@ import os.path import token -from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple +from collections.abc import Callable, Sequence +from typing import IO, Any from pegen import grammar from pegen.grammar import ( @@ -74,10 +75,10 @@ def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool: def visit_Opt(self, node: Opt) -> bool: return self.visit(node.node) - def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]: + def visit_Repeat(self, node: Repeat0) -> tuple[str, str]: return self.visit(node.node) - def visit_Gather(self, node: Gather) -> Tuple[str, str]: + def visit_Gather(self, node: Gather) -> tuple[str, str]: return self.visit(node.node) def visit_Group(self, node: Group) -> bool: @@ -93,9 +94,9 @@ def visit_Forced(self, node: Forced) -> bool: class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator - self.cache: Dict[str, Tuple[str, str]] = {} + self.cache: dict[str, tuple[str, str]] = {} - def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: + def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]: name = node.value if name == "SOFT_KEYWORD": return "soft_keyword", "self.soft_keyword()" @@ -108,31 +109,31 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: return "_" + name.lower(), f"self.expect({name!r})" return name, f"self.{name}()" - def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: + def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]: return "literal", f"self.expect({node.value})" - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: + def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]: name, call = self.visit(node.item) if node.name: name = node.name return name, call - def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]: + def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]: name, call = self.visit(node.node) head, tail = call.split("(", 1) assert tail[-1] == ")" tail = tail[:-1] return head, tail - def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: + def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]: head, tail = self.lookahead_call_helper(node) return None, f"self.positive_lookahead({head}, {tail})" - def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: + def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]: head, tail = self.lookahead_call_helper(node) return None, f"self.negative_lookahead({head}, {tail})" - def visit_Opt(self, node: Opt) -> Tuple[str, str]: + def visit_Opt(self, node: Opt) -> tuple[str, str]: name, call = self.visit(node.node) # Note trailing comma (the call may already have one comma # at the end, for example when rules have both repeat0 and optional @@ -148,7 +149,7 @@ def _generate_artificial_rule_call( prefix: str, call_by_name_func: Callable[[str], str], rule_generation_func: Callable[[], str], - ) -> Tuple[str, str]: + ) -> tuple[str, str]: node_str = f"{node}" key = f"{prefix}_{node_str}" if key in self.cache: @@ -159,7 +160,7 @@ def _generate_artificial_rule_call( self.cache[key] = name, call return self.cache[key] - def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: + def visit_Rhs(self, node: Rhs) -> tuple[str, str]: if len(node.alts) == 1 and len(node.alts[0].items) == 1: return self.visit(node.alts[0].items[0]) @@ -170,7 +171,7 @@ def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_rhs(node), ) - def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: + def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "repeat0", @@ -178,7 +179,7 @@ def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), ) - def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: + def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "repeat1", @@ -186,7 +187,7 @@ def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), ) - def visit_Gather(self, node: Gather) -> Tuple[str, str]: + def visit_Gather(self, node: Gather) -> tuple[str, str]: return self._generate_artificial_rule_call( node, "gather", @@ -194,13 +195,13 @@ def visit_Gather(self, node: Gather) -> Tuple[str, str]: lambda: self.gen.artificial_rule_from_gather(node), ) - def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: + def visit_Group(self, node: Group) -> tuple[str | None, str]: return self.visit(node.rhs) - def visit_Cut(self, node: Cut) -> Tuple[str, str]: + def visit_Cut(self, node: Cut) -> tuple[str, str]: return "cut", "True" - def visit_Forced(self, node: Forced) -> Tuple[str, str]: + def visit_Forced(self, node: Forced) -> tuple[str, str]: if isinstance(node.node, Group): _, val = self.visit(node.node.rhs) return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')" @@ -215,10 +216,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): def __init__( self, grammar: grammar.Grammar, - file: Optional[IO[Text]], - tokens: Set[str] = set(token.tok_name.values()), - location_formatting: Optional[str] = None, - unreachable_formatting: Optional[str] = None, + file: IO[str] | None, + tokens: set[str] = set(token.tok_name.values()), + location_formatting: str | None = None, + unreachable_formatting: str | None = None, ): tokens.add("SOFT_KEYWORD") super().__init__(grammar, tokens, file) @@ -355,7 +356,7 @@ def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: if is_loop: self.print(f"children.append({action})") - self.print(f"mark = self._mark()") + self.print("mark = self._mark()") else: if "UNREACHABLE" in action: action = action.replace("UNREACHABLE", self.unreachable_formatting) diff --git a/Tools/peg_generator/pegen/sccutils.py b/Tools/peg_generator/pegen/sccutils.py index da4c9331625dd9..51f618a14d936b 100644 --- a/Tools/peg_generator/pegen/sccutils.py +++ b/Tools/peg_generator/pegen/sccutils.py @@ -1,11 +1,11 @@ # Adapted from mypy (mypy/build.py) under the MIT license. -from typing import * +from collections.abc import Iterable, Iterator, Set def strongly_connected_components( - vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]] -) -> Iterator[AbstractSet[str]]: + vertices: Set[str], edges: dict[str, Set[str]] +) -> Iterator[Set[str]]: """Compute Strongly Connected Components of a directed graph. Args: @@ -20,12 +20,12 @@ def strongly_connected_components( From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/. """ - identified: Set[str] = set() - stack: List[str] = [] - index: Dict[str, int] = {} - boundaries: List[int] = [] + identified: set[str] = set() + stack: list[str] = [] + index: dict[str, int] = {} + boundaries: list[int] = [] - def dfs(v: str) -> Iterator[Set[str]]: + def dfs(v: str) -> Iterator[set[str]]: index[v] = len(stack) stack.append(v) boundaries.append(index[v]) @@ -50,8 +50,8 @@ def dfs(v: str) -> Iterator[Set[str]]: def topsort( - data: Dict[AbstractSet[str], Set[AbstractSet[str]]] -) -> Iterable[AbstractSet[AbstractSet[str]]]: + data: dict[Set[str], set[Set[str]]] +) -> Iterable[Set[Set[str]]]: """Topological sort. Args: @@ -94,12 +94,12 @@ def topsort( break yield ready data = {item: (dep - ready) for item, dep in data.items() if item not in ready} - assert not data, "A cyclic dependency exists amongst %r" % data + assert not data, f"A cyclic dependency exists amongst {data}" def find_cycles_in_scc( - graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str -) -> Iterable[List[str]]: + graph: dict[str, Set[str]], scc: Set[str], start: str +) -> Iterable[list[str]]: """Find cycles in SCC emanating from start. Yields lists of the form ['A', 'B', 'C', 'A'], which means there's @@ -117,7 +117,7 @@ def find_cycles_in_scc( assert start in graph # Recursive helper that yields cycles. - def dfs(node: str, path: List[str]) -> Iterator[List[str]]: + def dfs(node: str, path: list[str]) -> Iterator[list[str]]: if node in path: yield path + [node] return diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index 0e85b844ef152b..46f6c7fec1bda0 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -6,7 +6,7 @@ import textwrap import token import tokenize -from typing import IO, Any, Dict, Final, Optional, Type, cast +from typing import IO, Any, Final, cast from pegen.build import compile_c_extension from pegen.c_generator import CParserGenerator @@ -23,19 +23,19 @@ } -def generate_parser(grammar: Grammar) -> Type[Parser]: +def generate_parser(grammar: Grammar) -> type[Parser]: # Generate a parser. out = io.StringIO() genr = PythonParserGenerator(grammar, out) genr.generate("<string>") # Load the generated parser class. - ns: Dict[str, Any] = {} + ns: dict[str, Any] = {} exec(out.getvalue(), ns) return ns["GeneratedParser"] -def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: +def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any: # Run a parser on a file (stream). tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515 parser = parser_class(tokenizer, verbose=verbose) @@ -46,7 +46,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F def parse_string( - source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False + source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False ) -> Any: # Run the parser on a string. if dedent: @@ -55,7 +55,7 @@ def parse_string( return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515 -def make_parser(source: str) -> Type[Parser]: +def make_parser(source: str) -> type[Parser]: # Combine parse_string() and generate_parser(). grammar = parse_string(source, GrammarParser) return generate_parser(grammar) @@ -86,7 +86,7 @@ def generate_parser_c_extension( grammar: Grammar, path: pathlib.PurePath, debug: bool = False, - library_dir: Optional[str] = None, + library_dir: str | None = None, ) -> Any: """Generate a parser c extension for the given grammar in the given path diff --git a/Tools/peg_generator/pegen/tokenizer.py b/Tools/peg_generator/pegen/tokenizer.py index 7ee49e1432b77a..fba5bac9517895 100644 --- a/Tools/peg_generator/pegen/tokenizer.py +++ b/Tools/peg_generator/pegen/tokenizer.py @@ -1,6 +1,6 @@ import token import tokenize -from typing import Dict, Iterator, List +from collections.abc import Iterator Mark = int # NewType('Mark', int) @@ -8,7 +8,11 @@ def shorttok(tok: tokenize.TokenInfo) -> str: - return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" + formatted = ( + f"{tok.start[0]}.{tok.start[1]}: " + f"{token.tok_name[tok.type]}:{tok.string!r}" + ) + return f"{formatted:<25.25}" class Tokenizer: @@ -17,7 +21,7 @@ class Tokenizer: This is pretty tied to Python's syntax. """ - _tokens: List[tokenize.TokenInfo] + _tokens: list[tokenize.TokenInfo] def __init__( self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False @@ -26,7 +30,7 @@ def __init__( self._tokens = [] self._index = 0 self._verbose = verbose - self._lines: Dict[int, str] = {} + self._lines: dict[int, str] = {} self._path = path if verbose: self.report(False, False) @@ -72,7 +76,7 @@ def get_last_non_whitespace_token(self) -> tokenize.TokenInfo: break return tok - def get_lines(self, line_numbers: List[int]) -> List[str]: + def get_lines(self, line_numbers: list[int]) -> list[str]: """Retrieve source lines corresponding to line numbers.""" if self._lines: lines = self._lines diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index 4699d5712d9522..635eb398b41808 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,5 +1,3 @@ -from typing import Optional - from pegen import grammar from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule @@ -11,7 +9,7 @@ class ValidationError(Exception): class GrammarValidator(GrammarVisitor): def __init__(self, grammar: grammar.Grammar) -> None: self.grammar = grammar - self.rulename: Optional[str] = None + self.rulename: str | None = None def validate_rule(self, rulename: str, node: Rule) -> None: self.rulename = rulename diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 5bf180bb30a310..73236767374378 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.15 +mypy==1.17.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250801 +types-setuptools==80.9.0.20250801 diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 66898885c0a412..e5deac497fbe3f 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.111.2 +hypothesis==6.135.26 diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 68cfad3f92cdc7..905af9dcfd802e 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -492,7 +492,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: ): (trace_too_long, attempts), Doc( "Trace too short", - "A potential trace is abandoned because it it too short.", + "A potential trace is abandoned because it is too short.", ): (trace_too_short, attempts), Doc( "Inner loop found", "A trace is truncated because it has an inner loop" diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py index b1a5df91901fc6..ab9840c1c5252d 100755 --- a/Tools/ssl/multissltests.py +++ b/Tools/ssl/multissltests.py @@ -44,14 +44,15 @@ OPENSSL_OLD_VERSIONS = [ "1.1.1w", + "3.1.8", ] OPENSSL_RECENT_VERSIONS = [ - "3.0.16", - "3.1.8", - "3.2.4", - "3.3.3", - "3.4.1", + "3.0.18", + "3.2.6", + "3.3.5", + "3.4.3", + "3.5.4", # See make_ssl_data.py for notes on adding a new version. ] @@ -70,9 +71,8 @@ parser = argparse.ArgumentParser( prog='multissl', description=( - "Run CPython tests with multiple OpenSSL and LibreSSL " - "versions." - ) + "Run CPython tests with multiple cryptography libraries/versions." + ), ) parser.add_argument( '--debug', @@ -294,7 +294,7 @@ def _unpack_src(self): raise ValueError(member.name, base) member.name = member.name[len(base):].lstrip('/') log.info("Unpacking files to {}".format(self.build_dir)) - tf.extractall(self.build_dir, members) + tf.extractall(self.build_dir, members, filter='data') def _build_src(self, config_args=()): """Now build openssl""" diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 21224e490b8160..c10af91faae17c 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -46,4 +46,9 @@ race:list_inplace_repeat_lock_held # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 -race:PyObject_Realloc \ No newline at end of file +race:PyObject_Realloc + +# gh-133467. Some of these could be hard to trigger. +race_top:update_one_slot +race_top:set_tp_bases +race_top:type_set_bases_unlocked diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 889ae8fc869b8a..d4cca68c3e3e71 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -43,7 +43,7 @@ # When changing UCD version please update # * Doc/library/stdtypes.rst, and # * Doc/library/unicodedata.rst -# * Doc/reference/lexical_analysis.rst (two occurrences) +# * Doc/reference/lexical_analysis.rst (three occurrences) UNIDATA_VERSION = "16.0.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" diff --git a/Tools/wasm/.ruff.toml b/Tools/wasm/.ruff.toml new file mode 100644 index 00000000000000..3d8e59fa3f22c4 --- /dev/null +++ b/Tools/wasm/.ruff.toml @@ -0,0 +1,25 @@ +extend = "../../.ruff.toml" # Inherit the project-wide settings + +[format] +preview = true +docstring-code-format = true + +[lint] +select = [ + "C4", # flake8-comprehensions + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RUF100", # Ban unused `# noqa` comments + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] +ignore = [ + "E501", # Line too long +] diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md index 232321c515721e..91354b2e008041 100644 --- a/Tools/wasm/README.md +++ b/Tools/wasm/README.md @@ -22,7 +22,7 @@ https://github.com/psf/webassembly for more information. ### Build To cross compile to the ``wasm32-emscripten`` platform you need -[the Emscripten compiler toolchain](https://emscripten.org/), +[the Emscripten compiler toolchain](https://emscripten.org/), a Python interpreter, and an installation of Node version 18 or newer. Emscripten version 4.0.2 is recommended; newer versions may also work, but all official testing is performed with that version. All commands below are relative @@ -84,13 +84,24 @@ make a node application that "embeds" the interpreter instead of acting like the CLI you will need to write your own alternative to `node_entry.mjs`. +### Running tests + +After building, you can run the full test suite with: +```shell +./cross-build/wasm32-emscripten/build/python/python.sh -m test -uall +``` +You can run the browser smoke test with: +```shell +./Tools/wasm/emscripten/browser_test/run_test.sh +``` + ### The Web Example -When building for Emscripten, the web example will be built automatically. It is -in the ``web_example`` directory. To run the web example, ``cd`` into the +When building for Emscripten, the web example will be built automatically. It +is in the ``web_example`` directory. To run the web example, ``cd`` into the ``web_example`` directory, then run ``python server.py``. This will start a web -server; you can then visit ``http://localhost:8000/python.html`` in a browser to -see a simple REPL example. +server; you can then visit ``http://localhost:8000/`` in a browser to see a +simple REPL example. The web example relies on a bug fix in Emscripten version 3.1.73 so if you build with earlier versions of Emscripten it may not work. The web example uses @@ -215,8 +226,8 @@ await createEmscriptenModule({ e.g. ``ctypes``, ``readline``, ``ssl``, and more. - Shared extension modules are not implemented yet. All extension modules are statically linked into the main binary. The experimental configure - option ``--enable-wasm-dynamic-linking`` enables dynamic extensions - supports. It's currently known to crash in combination with threading. + option ``--enable-wasm-dynamic-linking`` enables dynamic extension + support. It's currently known to crash in combination with threading. - glibc extensions for date and time formatting are not available. - ``locales`` module is affected by musl libc issues, [gh-90548](https://github.com/python/cpython/issues/90548). diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 849bd5de44eb7b..c88e9edba6d230 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -3,15 +3,16 @@ import argparse import contextlib import functools +import hashlib import os import shutil import subprocess import sys import sysconfig import tempfile -from urllib.request import urlopen from pathlib import Path from textwrap import dedent +from urllib.request import urlopen try: from os import process_cpu_count as cpu_count @@ -32,7 +33,7 @@ PREFIX_DIR = CROSS_BUILD_DIR / HOST_TRIPLE / "prefix" LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" -LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/emscripten.py\n".encode("utf-8") +LOCAL_SETUP_MARKER = b"# Generated by Tools/wasm/emscripten.py\n" def updated_env(updates={}): @@ -44,7 +45,9 @@ def updated_env(updates={}): # https://reproducible-builds.org/docs/source-date-epoch/ git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] try: - epoch = subprocess.check_output(git_epoch_cmd, encoding="utf-8").strip() + epoch = subprocess.check_output( + git_epoch_cmd, encoding="utf-8" + ).strip() env_defaults["SOURCE_DATE_EPOCH"] = epoch except subprocess.CalledProcessError: pass # Might be building from a tarball. @@ -78,7 +81,11 @@ def wrapper(context): terminal_width = 80 print("⎯" * terminal_width) print("📁", working_dir) - if clean_ok and getattr(context, "clean", False) and working_dir.exists(): + if ( + clean_ok + and getattr(context, "clean", False) + and working_dir.exists() + ): print("🚮 Deleting directory (--clean)...") shutil.rmtree(working_dir) @@ -127,7 +134,9 @@ def build_python_path(): if not binary.is_file(): binary = binary.with_suffix(".exe") if not binary.is_file(): - raise FileNotFoundError("Unable to find `python(.exe)` in " f"{NATIVE_BUILD_DIR}") + raise FileNotFoundError( + f"Unable to find `python(.exe)` in {NATIVE_BUILD_DIR}" + ) return binary @@ -157,26 +166,75 @@ def make_build_python(context, working_dir): cmd = [ binary, "-c", - "import sys; " "print(f'{sys.version_info.major}.{sys.version_info.minor}')", + "import sys; " + "print(f'{sys.version_info.major}.{sys.version_info.minor}')", ] version = subprocess.check_output(cmd, encoding="utf-8").strip() print(f"🎉 {binary} {version}") -@subdir(HOST_BUILD_DIR, clean_ok=True) -def make_emscripten_libffi(context, working_dir): - shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) - with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: - with urlopen( - "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" - ) as response: +def check_shasum(file: str, expected_shasum: str): + with open(file, "rb") as f: + digest = hashlib.file_digest(f, "sha256") + if digest.hexdigest() != expected_shasum: + raise RuntimeError(f"Unexpected shasum for {file}") + + +def download_and_unpack(working_dir: Path, url: str, expected_shasum: str): + with tempfile.NamedTemporaryFile( + suffix=".tar.gz", delete_on_close=False + ) as tmp_file: + with urlopen(url) as response: shutil.copyfileobj(response, tmp_file) + tmp_file.close() + check_shasum(tmp_file.name, expected_shasum) shutil.unpack_archive(tmp_file.name, working_dir) + + +@subdir(HOST_BUILD_DIR, clean_ok=True) +def make_emscripten_libffi(context, working_dir): + ver = "3.4.6" + libffi_dir = working_dir / f"libffi-{ver}" + shutil.rmtree(libffi_dir, ignore_errors=True) + download_and_unpack( + working_dir, + f"https://github.com/libffi/libffi/releases/download/v{ver}/libffi-{ver}.tar.gz", + "b0dea9df23c863a7a50e825440f3ebffabd65df1497108e5d437747843895a4e", + ) call( [EMSCRIPTEN_DIR / "make_libffi.sh"], env=updated_env({"PREFIX": PREFIX_DIR}), - cwd=working_dir / "libffi-3.4.6", + cwd=libffi_dir, + quiet=context.quiet, + ) + + +@subdir(HOST_BUILD_DIR, clean_ok=True) +def make_mpdec(context, working_dir): + ver = "4.0.1" + mpdec_dir = working_dir / f"mpdecimal-{ver}" + shutil.rmtree(mpdec_dir, ignore_errors=True) + download_and_unpack( + working_dir, + f"https://www.bytereef.org/software/mpdecimal/releases/mpdecimal-{ver}.tar.gz", + "96d33abb4bb0070c7be0fed4246cd38416188325f820468214471938545b1ac8", + ) + call( + [ + "emconfigure", + mpdec_dir / "configure", + "CFLAGS=-fPIC", + "--prefix", + PREFIX_DIR, + "--disable-shared", + ], + cwd=mpdec_dir, + quiet=context.quiet, + ) + call( + ["make", "install"], + cwd=mpdec_dir, quiet=context.quiet, ) @@ -184,17 +242,15 @@ def make_emscripten_libffi(context, working_dir): @subdir(HOST_DIR, clean_ok=True) def configure_emscripten_python(context, working_dir): """Configure the emscripten/host build.""" - config_site = os.fsdecode( - CHECKOUT / "Tools" / "wasm" / "config.site-wasm32-emscripten" - ) + config_site = os.fsdecode(EMSCRIPTEN_DIR / "config.site-wasm32-emscripten") emscripten_build_dir = working_dir.relative_to(CHECKOUT) python_build_dir = NATIVE_BUILD_DIR / "build" lib_dirs = list(python_build_dir.glob("lib.*")) - assert ( - len(lib_dirs) == 1 - ), f"Expected a single lib.* directory in {python_build_dir}" + assert len(lib_dirs) == 1, ( + f"Expected a single lib.* directory in {python_build_dir}" + ) lib_dir = os.fsdecode(lib_dirs[0]) pydebug = lib_dir.endswith("-pydebug") python_version = lib_dir.removesuffix("-pydebug").rpartition("-")[-1] @@ -205,6 +261,17 @@ def configure_emscripten_python(context, working_dir): sysconfig_data += "-pydebug" host_runner = context.host_runner + if node_version := os.environ.get("PYTHON_NODE_VERSION", None): + res = subprocess.run( + [ + "bash", + "-c", + f"source ~/.nvm/nvm.sh && nvm which {node_version}", + ], + text=True, + capture_output=True, + ) + host_runner = res.stdout.strip() pkg_config_path_dir = (PREFIX_DIR / "lib/pkgconfig/").resolve() env_additions = { "CONFIG_SITE": config_site, @@ -237,7 +304,9 @@ def configure_emscripten_python(context, working_dir): quiet=context.quiet, ) - shutil.copy(EMSCRIPTEN_DIR / "node_entry.mjs", working_dir / "node_entry.mjs") + shutil.copy( + EMSCRIPTEN_DIR / "node_entry.mjs", working_dir / "node_entry.mjs" + ) node_entry = working_dir / "node_entry.mjs" exec_script = working_dir / "python.sh" @@ -262,10 +331,20 @@ def configure_emscripten_python(context, working_dir): REALPATH=abs_path fi + # Before node 24, --experimental-wasm-jspi uses different API, + # After node 24 JSPI is on by default. + ARGS=$({host_runner} -e "$(cat <<"EOF" + const major_version = Number(process.version.split(".")[0].slice(1)); + if (major_version === 24) {{ + process.stdout.write("--experimental-wasm-jspi"); + }} + EOF + )") + # We compute our own path, not following symlinks and pass it in so that # node_entry.mjs can set sys.executable correctly. # Intentionally allow word splitting on NODEFLAGS. - exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@" + exec {host_runner} $NODEFLAGS $ARGS {node_entry} --this-program="$($REALPATH "$0")" "$@" """ ) ) @@ -293,6 +372,7 @@ def build_all(context): configure_build_python, make_build_python, make_emscripten_libffi, + make_mpdec, configure_emscripten_python, make_emscripten_python, ] @@ -319,10 +399,15 @@ def main(): subcommands = parser.add_subparsers(dest="subcommand") build = subcommands.add_parser("build", help="Build everything") configure_build = subcommands.add_parser( - "configure-build-python", help="Run `configure` for the " "build Python" + "configure-build-python", help="Run `configure` for the build Python" + ) + make_mpdec_cmd = subcommands.add_parser( + "make-mpdec", + help="Clone mpdec repo, configure and build it for emscripten", ) make_libffi_cmd = subcommands.add_parser( - "make-libffi", help="Clone libffi repo, configure and build it for emscripten" + "make-libffi", + help="Clone libffi repo, configure and build it for emscripten", ) make_build = subcommands.add_parser( "make-build-python", help="Run `make` for the build Python" @@ -341,9 +426,11 @@ def main(): build, configure_build, make_libffi_cmd, + make_mpdec_cmd, make_build, configure_host, make_host, + clean, ): subcommand.add_argument( "--quiet", @@ -378,6 +465,7 @@ def main(): dispatch = { "make-libffi": make_emscripten_libffi, + "make-mpdec": make_mpdec, "configure-build-python": configure_build_python, "make-build-python": make_build_python, "configure-host": configure_emscripten_python, @@ -388,7 +476,11 @@ def main(): if not context.subcommand: # No command provided, display help and exit - print("Expected one of", ", ".join(sorted(dispatch.keys())), file=sys.stderr) + print( + "Expected one of", + ", ".join(sorted(dispatch.keys())), + file=sys.stderr, + ) parser.print_help(sys.stderr) sys.exit(1) dispatch[context.subcommand](context) diff --git a/Tools/wasm/emscripten/browser_test/.gitignore b/Tools/wasm/emscripten/browser_test/.gitignore new file mode 100644 index 00000000000000..3d54eab82a0d71 --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/.gitignore @@ -0,0 +1,4 @@ +node_modules +test-results +playwright-report +test_log.txt diff --git a/Tools/wasm/emscripten/browser_test/index.spec.ts b/Tools/wasm/emscripten/browser_test/index.spec.ts new file mode 100644 index 00000000000000..2594b73022bcc1 --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/index.spec.ts @@ -0,0 +1,15 @@ +import { test, expect } from '@playwright/test'; + +test('has title', async ({ page }) => { + await page.goto('/'); + + await expect(page).toHaveTitle("Emscripten PyRepl Example"); + const xterm = await page.locator('css=#terminal'); + await expect(xterm).toHaveText(/Python.*on emscripten.*Type.*for more information/); + const xtermInput = await page.getByRole('textbox'); + await xtermInput.pressSequentially(`def f():\nprint("hello", "emscripten repl!")\n\n`); + await xtermInput.pressSequentially(`f()\n`); + await expect(xterm).toHaveText(/hello emscripten repl!/); + +}); + diff --git a/Tools/wasm/emscripten/browser_test/package-lock.json b/Tools/wasm/emscripten/browser_test/package-lock.json new file mode 100644 index 00000000000000..044e3c19ce15f7 --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/package-lock.json @@ -0,0 +1,672 @@ +{ + "name": "browser_test", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "browser_test", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "@playwright/test": "^1.54.1", + "@types/node": "^24.1.0", + "http-server": "^14.1.1", + "playwright": "^1.54.1" + } + }, + "node_modules/@playwright/test": { + "version": "1.54.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.54.1.tgz", + "integrity": "sha512-FS8hQ12acieG2dYSksmLOF7BNxnVf2afRJdCuM1eMSxj6QTSE6G4InGF7oApGgDb65MX7AwMVlIkpru0yZA4Xw==", + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.54.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "24.1.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.1.0.tgz", + "integrity": "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.8.0" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/async": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz", + "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==", + "license": "MIT" + }, + "node_modules/basic-auth": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/basic-auth/-/basic-auth-2.0.1.tgz", + "integrity": "sha512-NF+epuEdnUYVlGuhaxbbq+dvJttwLnGY+YixlXlME5KpQ5W3CnXA5cVTneY3SPbPDRkcjMbifrwmFYcClgOZeg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.1.2" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/corser": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/corser/-/corser-2.0.1.tgz", + "integrity": "sha512-utCYNzRSQIZNPIcGZdQc92UVJYAhtGAteCFg0yRaFm8f0P+CPtyGyHXJcGXnffjCybUCEx3FQ2G7U3/o9eIkVQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/debug": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "license": "MIT" + }, + "node_modules/follow-redirects": { + "version": "1.15.9", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", + "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/html-encoding-sniffer": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz", + "integrity": "sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==", + "license": "MIT", + "dependencies": { + "whatwg-encoding": "^2.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "license": "MIT", + "dependencies": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/http-server": { + "version": "14.1.1", + "resolved": "https://registry.npmjs.org/http-server/-/http-server-14.1.1.tgz", + "integrity": "sha512-+cbxadF40UXd9T01zUHgA+rlo2Bg1Srer4+B4NwIHdaGxAGGv59nYRnGGDJ9LBk7alpS0US+J+bLLdQOOkJq4A==", + "license": "MIT", + "dependencies": { + "basic-auth": "^2.0.1", + "chalk": "^4.1.2", + "corser": "^2.0.1", + "he": "^1.2.0", + "html-encoding-sniffer": "^3.0.0", + "http-proxy": "^1.18.1", + "mime": "^1.6.0", + "minimist": "^1.2.6", + "opener": "^1.5.1", + "portfinder": "^1.0.28", + "secure-compare": "3.0.1", + "union": "~0.5.0", + "url-join": "^4.0.1" + }, + "bin": { + "http-server": "bin/http-server" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/opener": { + "version": "1.5.2", + "resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz", + "integrity": "sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==", + "license": "(WTFPL OR MIT)", + "bin": { + "opener": "bin/opener-bin.js" + } + }, + "node_modules/playwright": { + "version": "1.54.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.54.1.tgz", + "integrity": "sha512-peWpSwIBmSLi6aW2auvrUtf2DqY16YYcCMO8rTVx486jKmDTJg7UAhyrraP98GB8BoPURZP8+nxO7TSd4cPr5g==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.54.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.54.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.54.1.tgz", + "integrity": "sha512-Nbjs2zjj0htNhzgiy5wu+3w09YetDx5pkrpI/kZotDlDUaYk0HVA5xrBVPdow4SAUIlhgKcJeJg4GRKW6xHusA==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/portfinder": { + "version": "1.0.37", + "resolved": "https://registry.npmjs.org/portfinder/-/portfinder-1.0.37.tgz", + "integrity": "sha512-yuGIEjDAYnnOex9ddMnKZEMFE0CcGo6zbfzDklkmT1m5z734ss6JMzN9rNB3+RR7iS+F10D4/BVIaXOyh8PQKw==", + "license": "MIT", + "dependencies": { + "async": "^3.2.6", + "debug": "^4.3.6" + }, + "engines": { + "node": ">= 10.12" + } + }, + "node_modules/qs": { + "version": "6.14.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz", + "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", + "license": "MIT" + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/secure-compare": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/secure-compare/-/secure-compare-3.0.1.tgz", + "integrity": "sha512-AckIIV90rPDcBcglUwXPF3kg0P0qmPsPXAj6BBEENQE1p5yA1xfmDJzfi1Tappj37Pv2mVbKpL3Z1T+Nn7k1Qw==", + "license": "MIT" + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/undici-types": { + "version": "7.8.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz", + "integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==", + "license": "MIT" + }, + "node_modules/union": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/union/-/union-0.5.0.tgz", + "integrity": "sha512-N6uOhuW6zO95P3Mel2I2zMsbsanvvtgn6jVqJv4vbVcz/JN0OkL9suomjQGmWtxJQXOCqUJvquc1sMeNz/IwlA==", + "dependencies": { + "qs": "^6.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/url-join": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz", + "integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==", + "license": "MIT" + }, + "node_modules/whatwg-encoding": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz", + "integrity": "sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=12" + } + } + } +} diff --git a/Tools/wasm/emscripten/browser_test/package.json b/Tools/wasm/emscripten/browser_test/package.json new file mode 100644 index 00000000000000..3320d4cccd0594 --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/package.json @@ -0,0 +1,18 @@ +{ + "name": "browser_test", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "@playwright/test": "^1.54.1", + "@types/node": "^24.1.0", + "http-server": "^14.1.1", + "playwright": "^1.54.1" + } +} diff --git a/Tools/wasm/emscripten/browser_test/playwright.config.ts b/Tools/wasm/emscripten/browser_test/playwright.config.ts new file mode 100644 index 00000000000000..81d53ce11cb050 --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/playwright.config.ts @@ -0,0 +1,22 @@ +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + forbidOnly: true, + retries: 2, + reporter: process.env.CI ? 'dot' : 'html', + use: { + baseURL: 'http://localhost:8787', + trace: 'on-first-retry', + }, + projects: [ + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + }, + ], + webServer: { + command: 'npx http-server ../../../../cross-build/wasm32-emscripten/build/python/web_example_pyrepl_jspi/ -p 8787', + url: 'http://localhost:8787', + }, +}); diff --git a/Tools/wasm/emscripten/browser_test/run_test.sh b/Tools/wasm/emscripten/browser_test/run_test.sh new file mode 100755 index 00000000000000..9166e0d740585e --- /dev/null +++ b/Tools/wasm/emscripten/browser_test/run_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -euo pipefail +cd "$(dirname "$0")" +rm -f test_log.txt +echo "Installing node packages" | tee test_log.txt +npm ci >> test_log.txt 2>&1 +echo "Installing playwright browsers" | tee test_log.txt +npx playwright install 2>> test_log.txt +echo "Running tests" | tee test_log.txt +CI=1 npx playwright test | tee test_log.txt diff --git a/Tools/wasm/config.site-wasm32-emscripten b/Tools/wasm/emscripten/config.site-wasm32-emscripten similarity index 97% rename from Tools/wasm/config.site-wasm32-emscripten rename to Tools/wasm/emscripten/config.site-wasm32-emscripten index 1471546a5eec17..9f98e3f3c3bb1f 100644 --- a/Tools/wasm/config.site-wasm32-emscripten +++ b/Tools/wasm/emscripten/config.site-wasm32-emscripten @@ -1,6 +1,6 @@ # config.site override for cross compiling to wasm32-emscripten platform # -# CONFIG_SITE=Tools/wasm/config.site-wasm32-emscripten \ +# CONFIG_SITE=Tools/wasm/emscripten/config.site-wasm32-emscripten \ # emconfigure ./configure --host=wasm32-unknown-emscripten --build=... # # Written by Christian Heimes <christian@python.org> @@ -69,7 +69,6 @@ ac_cv_func_posix_fallocate=no # Syscalls that resulted in a segfault ac_cv_func_utimensat=no -ac_cv_header_sys_ioctl_h=no # sockets are supported, but only AF_INET / AF_INET6 in non-blocking mode. # Disable AF_UNIX and AF_PACKET support, see socketmodule.h. diff --git a/Tools/wasm/emscripten/prepare_external_wasm.py b/Tools/wasm/emscripten/prepare_external_wasm.py new file mode 100644 index 00000000000000..1b0a9de4b1fe8d --- /dev/null +++ b/Tools/wasm/emscripten/prepare_external_wasm.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import argparse +import sys +from pathlib import Path + +JS_TEMPLATE = """ +#include "emscripten.h" + +EM_JS(void, {function_name}, (void), {{ + return new WebAssembly.Module(hexStringToUTF8Array("{hex_string}")); +}} +function hexStringToUTF8Array(hex) {{ + const bytes = []; + for (let i = 0; i < hex.length; i += 2) {{ + bytes.push(parseInt(hex.substr(i, 2), 16)); + }} + return new Uint8Array(bytes); +}}); +""" + + +def prepare_wasm(input_file, output_file, function_name): + # Read the compiled WASM as binary and convert to hex + wasm_bytes = Path(input_file).read_bytes() + + hex_string = "".join(f"{byte:02x}" for byte in wasm_bytes) + + # Generate JavaScript module + js_content = JS_TEMPLATE.format( + function_name=function_name, hex_string=hex_string + ) + Path(output_file).write_text(js_content) + + print(f"Successfully compiled {input_file} and generated {output_file}") + return 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Compile WebAssembly text files using wasm-as" + ) + parser.add_argument("input_file", help="Input .wat file to compile") + parser.add_argument("output_file", help="Output file name") + parser.add_argument("function_name", help="Name of the export function") + + args = parser.parse_args() + + return prepare_wasm(args.input_file, args.output_file, args.function_name) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Tools/wasm/emscripten/web_example/wasm_assets.py b/Tools/wasm/emscripten/wasm_assets.py similarity index 95% rename from Tools/wasm/emscripten/web_example/wasm_assets.py rename to Tools/wasm/emscripten/wasm_assets.py index deeb9229a4412b..384790872353b2 100755 --- a/Tools/wasm/emscripten/web_example/wasm_assets.py +++ b/Tools/wasm/emscripten/wasm_assets.py @@ -15,10 +15,9 @@ import sys import sysconfig import zipfile -from typing import Dict # source directory -SRCDIR = pathlib.Path(__file__).parents[4].absolute() +SRCDIR = pathlib.Path(__file__).parents[3].absolute() SRCDIR_LIB = SRCDIR / "Lib" @@ -27,7 +26,9 @@ WASM_STDLIB_ZIP = ( WASM_LIB / f"python{sys.version_info.major}{sys.version_info.minor}.zip" ) -WASM_STDLIB = WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}" +WASM_STDLIB = ( + WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}" +) WASM_DYNLOAD = WASM_STDLIB / "lib-dynload" @@ -58,7 +59,6 @@ # socket.create_connection() raises an exception: # "BlockingIOError: [Errno 26] Operation in progress". OMIT_NETWORKING_FILES = ( - "email/", "ftplib.py", "http/", "imaplib.py", @@ -84,7 +84,6 @@ "_json": ["json/"], "_multiprocessing": ["concurrent/futures/process.py", "multiprocessing/"], "pyexpat": ["xml/", "xmlrpc/"], - "readline": ["rlcompleter.py"], "_sqlite3": ["sqlite3/"], "_ssl": ["ssl.py"], "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"], @@ -134,7 +133,7 @@ def filterfunc(filename: str) -> bool: pzf.writepy(entry, filterfunc=filterfunc) -def detect_extension_modules(args: argparse.Namespace) -> Dict[str, bool]: +def detect_extension_modules(args: argparse.Namespace) -> dict[str, bool]: modules = {} # disabled by Modules/Setup.local ? @@ -149,7 +148,7 @@ def detect_extension_modules(args: argparse.Namespace) -> Dict[str, bool]: # disabled by configure? with open(args.sysconfig_data) as f: data = f.read() - loc: Dict[str, Dict[str, str]] = {} + loc: dict[str, dict[str, str]] = {} exec(data, globals(), loc) for key, value in loc["build_time_vars"].items(): diff --git a/Tools/wasm/emscripten/web_example/python.html b/Tools/wasm/emscripten/web_example/index.html similarity index 52% rename from Tools/wasm/emscripten/web_example/python.html rename to Tools/wasm/emscripten/web_example/index.html index 078f86eb764419..9c89c9c0ed3bf5 100644 --- a/Tools/wasm/emscripten/web_example/python.html +++ b/Tools/wasm/emscripten/web_example/index.html @@ -1,31 +1,39 @@ <!doctype html> <html lang="en"> <head> - <meta charset="UTF-8" /> - <meta http-equiv="X-UA-Compatible" content="IE=edge" /> - <meta name="viewport" content="width=device-width, initial-scale=1.0" /> - <meta name="author" content="Katie Bell" /> - <meta name="description" content="Simple REPL for Python WASM" /> + <meta charset="UTF-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <meta name="author" content="Katie Bell, Adam Hartz"> + <meta name="description" content="Simple REPL for Python WASM"> <title>wasm-python terminal</title> <link rel="stylesheet" href="https://unpkg.com/xterm@4.18.0/css/xterm.css" crossorigin integrity="sha384-4eEEn/eZgVHkElpKAzzPx/Kow/dTSgFk1BNe+uHdjHa+NkZJDh5Vqkq31+y7Eycd" - /> + > <style> body { font-family: arial; max-width: 800px; margin: 0 auto; } - #code { + #editor { + padding: 5px; + border: 1px solid black; width: 100%; - height: 180px; + height: 300px; } #info { padding-top: 20px; } + .error { + border: 1px solid red; + background-color: #ffd9d9; + padding: 5px; + margin-top: 20px; + } .button-container { display: flex; justify-content: end; @@ -41,8 +49,14 @@ src="https://unpkg.com/xterm@4.18.0/lib/xterm.js" crossorigin integrity="sha384-yYdNmem1ioP5Onm7RpXutin5A8TimLheLNQ6tnMi01/ZpxXdAwIm2t4fJMx1Djs+" - /> + ></script> + <script + src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.43.1/ace.js" + crossorigin + integrity="sha512-kmA5vhcxOkZI0ReiKJMGNb8/KKbgbExIlnt6aXuPtl86AgHBEi6OHHOz2wsTazBDGZKxe7fmiE+pIuZJQks4+A==" + ></script> <script type="module"> + const _magic_ctrlc_string = "__WASM_REPL_CTRLC_" + (Date.now()) + "__"; class WorkerManager { constructor( workerURL, @@ -132,11 +146,14 @@ class WasmTerminal { constructor() { - this.inputBuffer = new BufferQueue(); - this.input = ""; - this.resolveInput = null; - this.activeInput = false; - this.inputStartCursor = null; + try { + this.history = JSON.parse(sessionStorage.getItem('__python_wasm_repl.history')); + this.historyBuffer = this.history.slice(); + } catch(e) { + this.history = []; + this.historyBuffer = []; + } + this.reset(); this.xterm = new Terminal({ scrollback: 10000, @@ -155,6 +172,18 @@ this.xterm.onData(this.handleTermData); } + reset() { + this.inputBuffer = new BufferQueue(); + this.input = ""; + this.resolveInput = null; + this.activeInput = false; + this.inputStartCursor = null; + + this.cursorPosition = 0; + this.historyIndex = -1; + this.beforeHistoryNav = ""; + } + open(container) { this.xterm.open(container); } @@ -186,9 +215,34 @@ if (!(ord === 0x1b || ord == 0x7f || ord < 32)) { this.inputBuffer.addData(data); } - // TODO: Handle ANSI escape sequences + // TODO: Handle more escape sequences? } else if (ord === 0x1b) { // Handle special characters + switch (data.slice(1)) { + case "[A": // up + this.historyBack(); + break; + case "[B": // down + this.historyForward(); + break; + case "[C": // right + this.cursorRight(); + break; + case "[D": // left + this.cursorLeft(); + break; + case "[H": // home key + this.cursorHome(true); + break; + case "[F": // end key + this.cursorEnd(true); + break; + case "[3~": // delete key + this.deleteAtCursor(); + break; + default: + break; + } } else if (ord < 32 || ord === 0x7f) { switch (data) { case "\x0c": // CTRL+L @@ -201,8 +255,18 @@ this.input + this.writeLine("\n"), ); this.input = ""; + this.cursorPosition = 0; this.activeInput = false; break; + case "\x03": // CTRL+C + this.input = ""; + this.cursorPosition = 0; + this.historyIndex = -1; + this.resolveInput(_magic_ctrlc_string + "\n"); + break; + case "\x09": // TAB + this.handleTab(); + break; case "\x7F": // BACKSPACE case "\x08": // CTRL+H this.handleCursorErase(true); @@ -211,14 +275,20 @@ // Send empty input if (this.input === "") { this.resolveInput(""); + this.cursorPosition = 0; this.activeInput = false; } } } else { this.handleCursorInsert(data); + this.updateHistory(); } }; + clearLine() { + this.xterm.write("\x1b[K"); + } + writeLine(line) { this.xterm.write(line.slice(0, -1)); this.xterm.write("\r\n"); @@ -226,8 +296,36 @@ } handleCursorInsert(data) { - this.input += data; + const trailing = this.input.slice(this.cursorPosition); + this.input = + this.input.slice(0, this.cursorPosition) + + data + + trailing; + this.cursorPosition += data.length; this.xterm.write(data); + if (trailing.length !== 0) { + this.xterm.write(trailing); + this.xterm.write("\x1b[" + trailing.length + "D"); + } + this.updateHistory(); + } + + handleTab() { + // handle tabs: from the current position, add spaces until + // this.cursorPosition is a multiple of 4. + const prefix = this.input.slice(0, this.cursorPosition); + const suffix = this.input.slice(this.cursorPosition); + const count = 4 - (this.cursorPosition % 4); + const toAdd = " ".repeat(count); + this.input = prefix + toAdd + suffix; + this.cursorHome(false); + this.clearLine(); + this.xterm.write(this.input); + if (suffix) { + this.xterm.write("\x1b[" + suffix.length + "D"); + } + this.cursorPosition += count; + this.updateHistory(); } handleCursorErase() { @@ -238,9 +336,113 @@ ) { return; } - this.input = this.input.slice(0, -1); - this.xterm.write("\x1B[D"); - this.xterm.write("\x1B[P"); + const trailing = this.input.slice(this.cursorPosition); + this.input = + this.input.slice(0, this.cursorPosition - 1) + trailing; + this.cursorLeft(); + this.clearLine(); + if (trailing.length !== 0) { + this.xterm.write(trailing); + this.xterm.write("\x1b[" + trailing.length + "D"); + } + this.updateHistory(); + } + + deleteAtCursor() { + if (this.cursorPosition < this.input.length) { + const trailing = this.input.slice( + this.cursorPosition + 1, + ); + this.input = + this.input.slice(0, this.cursorPosition) + trailing; + this.clearLine(); + if (trailing.length !== 0) { + this.xterm.write(trailing); + this.xterm.write("\x1b[" + trailing.length + "D"); + } + this.updateHistory(); + } + } + + cursorRight() { + if (this.cursorPosition < this.input.length) { + this.cursorPosition += 1; + this.xterm.write("\x1b[C"); + } + } + + cursorLeft() { + if (this.cursorPosition > 0) { + this.cursorPosition -= 1; + this.xterm.write("\x1b[D"); + } + } + + cursorHome(updatePosition) { + if (this.cursorPosition > 0) { + this.xterm.write("\x1b[" + this.cursorPosition + "D"); + if (updatePosition) { + this.cursorPosition = 0; + } + } + } + + cursorEnd() { + if (this.cursorPosition < this.input.length) { + this.xterm.write( + "\x1b[" + + (this.input.length - this.cursorPosition) + + "C", + ); + this.cursorPosition = this.input.length; + } + } + + updateHistory() { + if (this.historyIndex !== -1) { + this.historyBuffer[this.historyIndex] = this.input; + } else { + this.beforeHistoryNav = this.input; + } + } + + historyBack() { + if (this.history.length === 0) { + return; + } else if (this.historyIndex === -1) { + // we're not currently navigating the history; store + // the current command and then look at the end of our + // history buffer + this.beforeHistoryNav = this.input; + this.historyIndex = this.history.length - 1; + } else if (this.historyIndex > 0) { + this.historyIndex -= 1; + } + this.input = this.historyBuffer[this.historyIndex]; + this.cursorHome(false); + this.clearLine(); + this.xterm.write(this.input); + this.cursorPosition = this.input.length; + } + + historyForward() { + if (this.history.length === 0 || this.historyIndex === -1) { + // we're not currently navigating the history; NOP. + return; + } else if (this.historyIndex < this.history.length - 1) { + this.historyIndex += 1; + this.input = this.historyBuffer[this.historyIndex]; + } else if (this.historyIndex == this.history.length - 1) { + // we're coming back from the last history value; reset + // the input to whatever it was when we started going + // through the history + this.input = this.beforeHistoryNav; + this.historyIndex = -1; + } + this.cursorHome(false); + this.clearLine(); + this.xterm.write(this.input); + this.cursorPosition = this.input.length; } prompt = async () => { @@ -263,12 +465,29 @@ // Hack to ensure cursor input start doesn't end up after user input setTimeout(() => { this.handleCursorInsert( - this.inputBuffer.nextLine(), + this.inputBuffer.nextLine() ); }, 1); } return new Promise((resolve, reject) => { this.resolveInput = (value) => { + if ( + value.replace(/\s/g, "").length != 0 && + value != _magic_ctrlc_string + "\n" + ) { + if (this.historyIndex !== -1) { + this.historyBuffer[this.historyIndex] = + this.history[this.historyIndex]; + } + this.history.push(value.slice(0, -1)); + this.historyBuffer.push(value.slice(0, -1)); + this.historyIndex = -1; + this.cursorPosition = 0; + try { + sessionStorage.setItem('__python_wasm_repl.history', JSON.stringify(this.history)); + } catch(e) { + } + } resolve(value); }; }); @@ -327,8 +546,6 @@ const stopButton = document.getElementById("stop"); const clearButton = document.getElementById("clear"); - const codeBox = document.getElementById("codebox"); - window.onload = () => { const terminal = new WasmTerminal(); terminal.open(document.getElementById("terminal")); @@ -362,8 +579,9 @@ runButton.addEventListener("click", (e) => { terminal.clear(); + terminal.reset(); // reset the history programRunning(true); - const code = codeBox.value; + const code = editor.getValue(); pythonWorkerManager.run({ args: ["main.py"], files: { "main.py": code }, @@ -372,10 +590,28 @@ replButton.addEventListener("click", (e) => { terminal.clear(); + terminal.reset(); // reset the history + const REPL = ` +class WASMREPLKeyboardInterrupt(KeyboardInterrupt): + pass + +import sys +import code +import builtins + +def _interrupt_aware_input(prompt=''): + line = builtins.input(prompt) + if line.strip() == "${_magic_ctrlc_string}": + raise KeyboardInterrupt() + return line + +cprt = 'Type "help", "copyright", "credits" or "license" for more information.' +banner = f'Python {sys.version} on {sys.platform}\\n{cprt}' + +code.interact(banner=banner, readfunc=_interrupt_aware_input, exitmsg='') +`; programRunning(true); - // Need to use "-i -" to force interactive mode. - // Looks like isatty always returns false in emscripten - pythonWorkerManager.run({ args: ["-i", "-"], files: {} }); + pythonWorkerManager.run({ args: ["-c", REPL], files: {} }); }); stopButton.addEventListener("click", (e) => { @@ -395,6 +631,7 @@ const finishedCallback = () => { programRunning(false); + pythonWorkerManager.reset(); }; const pythonWorkerManager = new WorkerManager( @@ -404,30 +641,63 @@ finishedCallback, ); }; + var editor; + document.addEventListener("DOMContentLoaded", () => { + editor = ace.edit("editor"); + editor.session.setMode("ace/mode/python"); + }); </script> </head> <body> - <h1>Simple REPL for Python WASM</h1> - <textarea id="codebox" cols="108" rows="16"> -print('Welcome to WASM!') -</textarea - > - <div class="button-container"> - <button id="run" disabled>Run</button> - <button id="repl" disabled>Start REPL</button> - <button id="stop" disabled>Stop</button> - <button id="clear" disabled>Clear</button> + <div id="repldemo"> + <h1>Simple REPL for Python WASM</h1> + <div id="editor">print('Welcome to WASM!')</div> + <div class="button-container"> + <button id="run" disabled>Run code</button> + <button id="repl" disabled>Start REPL</button> + <button id="stop" disabled>Stop</button> + <button id="clear" disabled>Clear</button> + </div> + <div id="terminal"></div> + <div id="info"> + The simple REPL provides a limited Python experience in the + browser. + <a + href="https://github.com/python/cpython/blob/main/Tools/wasm/README.md" + > + Tools/wasm/README.md + </a> + contains a list of known limitations and issues. Networking, + subprocesses, and threading are not available. + </div> </div> - <div id="terminal"></div> - <div id="info"> - The simple REPL provides a limited Python experience in the browser. - <a - href="https://github.com/python/cpython/blob/main/Tools/wasm/README.md" - > - Tools/wasm/README.md - </a> - contains a list of known limitations and issues. Networking, - subprocesses, and threading are not available. + <div id="buffererror" class="error" style="display: none"> + <p> + <code>SharedArrayBuffer</code>, which is required for this demo, + is not available in your browser environment. One common cause + of this failure is loading <code>index.html</code> directly in + your browser instead of using <code>server.py</code> as + described in + <a + href="https://github.com/python/cpython/blob/main/Tools/wasm/README.md#the-web-example" + > + Tools/wasm/README.md + </a>. + </p> + <p> + For more details about security requirements for + <code>SharedArrayBuffer</code>, see + <a + href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer#security_requirements" + >this MDN page</a + >. + </p> + <script> + if (typeof SharedArrayBuffer === 'undefined') { + document.getElementById('repldemo').style.display = 'none'; + document.getElementById('buffererror').style.display = 'block'; + } + </script> </div> </body> </html> diff --git a/Tools/wasm/emscripten/web_example/server.py b/Tools/wasm/emscripten/web_example/server.py index 768e6f84e07798..f2e6ed56c6bcff 100755 --- a/Tools/wasm/emscripten/web_example/server.py +++ b/Tools/wasm/emscripten/web_example/server.py @@ -6,10 +6,16 @@ description="Start a local webserver with a Python terminal." ) parser.add_argument( - "--port", type=int, default=8000, help="port for the http server to listen on" + "--port", + type=int, + default=8000, + help="port for the http server to listen on", ) parser.add_argument( - "--bind", type=str, default="127.0.0.1", help="Bind address (empty for all)" + "--bind", + type=str, + default="127.0.0.1", + help="Bind address (empty for all)", ) diff --git a/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html b/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html new file mode 100644 index 00000000000000..cc7413b5caa982 --- /dev/null +++ b/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html @@ -0,0 +1,35 @@ +<!doctype html> +<html> + <head> + <title>Emscripten PyRepl Example</title> + <link + rel="stylesheet" + href="https://unpkg.com/xterm@4.18.0/css/xterm.css" + /> + <style> + body { + background-color: #300a24; + } + + .xterm-dom-renderer-owner-1 .xterm-fg-3 { + color: #c4a000 !important; + } + + .xterm-dom-renderer-owner-1 .xterm-fg-6 { + color: #2aa1b3 !important; + } + + .xterm-dom-renderer-owner-1 .xterm-fg-12 { + color: #1054a6 !important; + } + + .xterm-dom-renderer-owner-1 .xterm-fg-13 { + color: #a347ba !important; + } + </style> + </head> + <body> + <div id="terminal"></div> + <script type="module" src="src.mjs"></script> + </body> +</html> diff --git a/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs b/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs new file mode 100644 index 00000000000000..5642372c9d2472 --- /dev/null +++ b/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs @@ -0,0 +1,194 @@ +// Much of this is adapted from here: +// https://github.com/mame/xterm-pty/blob/main/emscripten-pty.js +// Thanks to xterm-pty for making this possible! + +import createEmscriptenModule from "./python.mjs"; +import { openpty } from "https://unpkg.com/xterm-pty/index.mjs"; +import "https://unpkg.com/@xterm/xterm/lib/xterm.js"; + +var term = new Terminal(); +term.open(document.getElementById("terminal")); +const { master, slave: PTY } = openpty(); +term.loadAddon(master); +globalThis.PTY = PTY; + +async function setupStdlib(Module) { + const versionInt = Module.HEAPU32[Module._Py_Version >>> 2]; + const major = (versionInt >>> 24) & 0xff; + const minor = (versionInt >>> 16) & 0xff; + // Prevent complaints about not finding exec-prefix by making a lib-dynload directory + Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); + const resp = await fetch(`python${major}.${minor}.zip`); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile( + `/lib/python${major}${minor}.zip`, + new Uint8Array(stdlibBuffer), + { canOwn: true }, + ); +} + +const tty_ops = { + ioctl_tcgets: () => { + const termios = PTY.ioctl("TCGETS"); + const data = { + c_iflag: termios.iflag, + c_oflag: termios.oflag, + c_cflag: termios.cflag, + c_lflag: termios.lflag, + c_cc: termios.cc, + }; + return data; + }, + + ioctl_tcsets: (_tty, _optional_actions, data) => { + PTY.ioctl("TCSETS", { + iflag: data.c_iflag, + oflag: data.c_oflag, + cflag: data.c_cflag, + lflag: data.c_lflag, + cc: data.c_cc, + }); + return 0; + }, + + ioctl_tiocgwinsz: () => PTY.ioctl("TIOCGWINSZ").reverse(), + + get_char: () => { + throw new Error("Should not happen"); + }, + put_char: () => { + throw new Error("Should not happen"); + }, + + fsync: () => {}, +}; + +const POLLIN = 1; +const POLLOUT = 4; + +const waitResult = { + READY: 0, + SIGNAL: 1, + TIMEOUT: 2, +}; + +function onReadable() { + var handle; + var promise = new Promise((resolve) => { + handle = PTY.onReadable(() => resolve(waitResult.READY)); + }); + return [promise, handle]; +} + +function onSignal() { + // TODO: signal handling + var handle = { dispose() {} }; + var promise = new Promise((resolve) => {}); + return [promise, handle]; +} + +function onTimeout(timeout) { + var id; + var promise = new Promise((resolve) => { + if (timeout > 0) { + id = setTimeout(resolve, timeout, waitResult.TIMEOUT); + } + }); + var handle = { + dispose() { + if (id) { + clearTimeout(id); + } + }, + }; + return [promise, handle]; +} + +async function waitForReadable(timeout) { + let p1, p2, p3; + let h1, h2, h3; + try { + [p1, h1] = onReadable(); + [p2, h2] = onTimeout(timeout); + [p3, h3] = onSignal(); + return await Promise.race([p1, p2, p3]); + } finally { + h1.dispose(); + h2.dispose(); + h3.dispose(); + } +} + +const FIONREAD = 0x541b; + +const tty_stream_ops = { + async readAsync(stream, buffer, offset, length, pos /* ignored */) { + let readBytes = PTY.read(length); + if (length && !readBytes.length) { + const status = await waitForReadable(-1); + if (status === waitResult.READY) { + readBytes = PTY.read(length); + } else { + throw new Error("Not implemented"); + } + } + buffer.set(readBytes, offset); + return readBytes.length; + }, + + write: (stream, buffer, offset, length) => { + // Note: default `buffer` is for some reason `HEAP8` (signed), while we want unsigned `HEAPU8`. + buffer = new Uint8Array( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength, + ); + const toWrite = Array.from(buffer.subarray(offset, offset + length)); + PTY.write(toWrite); + return length; + }, + + async pollAsync(stream, timeout) { + if (!PTY.readable && timeout) { + await waitForReadable(timeout); + } + return (PTY.readable ? POLLIN : 0) | (PTY.writable ? POLLOUT : 0); + }, + ioctl(stream, request, varargs) { + if (request === FIONREAD) { + const res = PTY.fromLdiscToUpperBuffer.length; + Module.HEAPU32[varargs / 4] = res; + return 0; + } + throw new Error("Unimplemented ioctl request"); + }, +}; + +async function setupStdio(Module) { + Object.assign(Module.TTY.default_tty_ops, tty_ops); + Object.assign(Module.TTY.stream_ops, tty_stream_ops); +} + +const emscriptenSettings = { + async preRun(Module) { + Module.addRunDependency("pre-run"); + Module.ENV.TERM = "xterm-256color"; + // Uncomment next line to turn on tracing (messages go to browser console). + // Module.ENV.PYREPL_TRACE = "1"; + + // Leak module so we can try to show traceback if we crash on startup + globalThis.Module = Module; + await Promise.all([setupStdlib(Module), setupStdio(Module)]); + Module.removeRunDependency("pre-run"); + }, +}; + +try { + await createEmscriptenModule(emscriptenSettings); +} catch (e) { + // Show JavaScript exception and traceback + console.warn(e); + // Show Python exception and traceback + Module.__Py_DumpTraceback(2, Module._PyGILState_GetThisThreadState()); + process.exit(1); +} diff --git a/Tools/wasm/mypy.ini b/Tools/wasm/mypy.ini deleted file mode 100644 index 4de0a30c260f5f..00000000000000 --- a/Tools/wasm/mypy.ini +++ /dev/null @@ -1,11 +0,0 @@ -[mypy] -files = Tools/wasm/wasm_*.py -pretty = True -show_traceback = True - -# Make sure the wasm can be run using Python 3.8: -python_version = 3.8 - -# Be strict... -strict = True -enable_error_code = truthy-bool,ignore-without-code diff --git a/Tools/wasm/wasi-env b/Tools/wasm/wasi-env index 4c5078a1f675e2..08d4f499baa85c 100755 --- a/Tools/wasm/wasi-env +++ b/Tools/wasm/wasi-env @@ -1,7 +1,8 @@ #!/bin/sh set -e -# NOTE: to be removed once no longer used in https://github.com/python/buildmaster-config/blob/main/master/custom/factories.py . +# NOTE: to be removed once no longer used in https://github.com/python/buildmaster-config/blob/main/master/custom/factories.py ; +# expected in Python 3.18 as 3.13 is when `wasi.py` was introduced. # function usage() { diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index a742043e4be1d2..af55e03d10f754 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -1,367 +1,12 @@ -#!/usr/bin/env python3 +if __name__ == "__main__": + import pathlib + import runpy + import sys -import argparse -import contextlib -import functools -import os -try: - from os import process_cpu_count as cpu_count -except ImportError: - from os import cpu_count -import pathlib -import shutil -import subprocess -import sys -import sysconfig -import tempfile - - -CHECKOUT = pathlib.Path(__file__).parent.parent.parent - -CROSS_BUILD_DIR = CHECKOUT / "cross-build" -BUILD_DIR = CROSS_BUILD_DIR / "build" - -LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" -LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/wasi.py\n".encode("utf-8") - -WASMTIME_VAR_NAME = "WASMTIME" -WASMTIME_HOST_RUNNER_VAR = f"{{{WASMTIME_VAR_NAME}}}" - - -def updated_env(updates={}): - """Create a new dict representing the environment to use. - - The changes made to the execution environment are printed out. - """ - env_defaults = {} - # https://reproducible-builds.org/docs/source-date-epoch/ - git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] - try: - epoch = subprocess.check_output(git_epoch_cmd, encoding="utf-8").strip() - env_defaults["SOURCE_DATE_EPOCH"] = epoch - except subprocess.CalledProcessError: - pass # Might be building from a tarball. - # This layering lets SOURCE_DATE_EPOCH from os.environ takes precedence. - environment = env_defaults | os.environ | updates - - env_diff = {} - for key, value in environment.items(): - if os.environ.get(key) != value: - env_diff[key] = value - - print("🌎 Environment changes:") - for key in sorted(env_diff.keys()): - print(f" {key}={env_diff[key]}") - - return environment - - -def subdir(working_dir, *, clean_ok=False): - """Decorator to change to a working directory.""" - def decorator(func): - @functools.wraps(func) - def wrapper(context): - nonlocal working_dir - - if callable(working_dir): - working_dir = working_dir(context) - try: - tput_output = subprocess.check_output(["tput", "cols"], - encoding="utf-8") - except subprocess.CalledProcessError: - terminal_width = 80 - else: - terminal_width = int(tput_output.strip()) - print("⎯" * terminal_width) - print("📁", working_dir) - if (clean_ok and getattr(context, "clean", False) and - working_dir.exists()): - print(f"🚮 Deleting directory (--clean)...") - shutil.rmtree(working_dir) - - working_dir.mkdir(parents=True, exist_ok=True) - - with contextlib.chdir(working_dir): - return func(context, working_dir) - - return wrapper - - return decorator - - -def call(command, *, quiet, **kwargs): - """Execute a command. - - If 'quiet' is true, then redirect stdout and stderr to a temporary file. - """ - print("❯", " ".join(map(str, command))) - if not quiet: - stdout = None - stderr = None - else: - stdout = tempfile.NamedTemporaryFile("w", encoding="utf-8", - delete=False, - prefix="cpython-wasi-", - suffix=".log") - stderr = subprocess.STDOUT - print(f"📝 Logging output to {stdout.name} (--quiet)...") - - subprocess.check_call(command, **kwargs, stdout=stdout, stderr=stderr) - - -def build_platform(): - """The name of the build/host platform.""" - # Can also be found via `config.guess`.` - return sysconfig.get_config_var("BUILD_GNU_TYPE") - - -def build_python_path(): - """The path to the build Python binary.""" - binary = BUILD_DIR / "python" - if not binary.is_file(): - binary = binary.with_suffix(".exe") - if not binary.is_file(): - raise FileNotFoundError("Unable to find `python(.exe)` in " - f"{BUILD_DIR}") - - return binary - - -@subdir(BUILD_DIR, clean_ok=True) -def configure_build_python(context, working_dir): - """Configure the build/host Python.""" - if LOCAL_SETUP.exists(): - print(f"👍 {LOCAL_SETUP} exists ...") - else: - print(f"📝 Touching {LOCAL_SETUP} ...") - LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) - - configure = [os.path.relpath(CHECKOUT / 'configure', working_dir)] - if context.args: - configure.extend(context.args) - - call(configure, quiet=context.quiet) - - -@subdir(BUILD_DIR) -def make_build_python(context, working_dir): - """Make/build the build Python.""" - call(["make", "--jobs", str(cpu_count()), "all"], - quiet=context.quiet) - - binary = build_python_path() - cmd = [binary, "-c", - "import sys; " - "print(f'{sys.version_info.major}.{sys.version_info.minor}')"] - version = subprocess.check_output(cmd, encoding="utf-8").strip() - - print(f"🎉 {binary} {version}") - - -def find_wasi_sdk(): - """Find the path to wasi-sdk.""" - if wasi_sdk_path := os.environ.get("WASI_SDK_PATH"): - return pathlib.Path(wasi_sdk_path) - elif (default_path := pathlib.Path("/opt/wasi-sdk")).exists(): - return default_path - - -def wasi_sdk_env(context): - """Calculate environment variables for building with wasi-sdk.""" - wasi_sdk_path = context.wasi_sdk_path - sysroot = wasi_sdk_path / "share" / "wasi-sysroot" - env = {"CC": "clang", "CPP": "clang-cpp", "CXX": "clang++", - "AR": "llvm-ar", "RANLIB": "ranlib"} - - for env_var, binary_name in list(env.items()): - env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name) - - if wasi_sdk_path != pathlib.Path("/opt/wasi-sdk"): - for compiler in ["CC", "CPP", "CXX"]: - env[compiler] += f" --sysroot={sysroot}" - - env["PKG_CONFIG_PATH"] = "" - env["PKG_CONFIG_LIBDIR"] = os.pathsep.join( - map(os.fsdecode, - [sysroot / "lib" / "pkgconfig", - sysroot / "share" / "pkgconfig"])) - env["PKG_CONFIG_SYSROOT_DIR"] = os.fsdecode(sysroot) - - env["WASI_SDK_PATH"] = os.fsdecode(wasi_sdk_path) - env["WASI_SYSROOT"] = os.fsdecode(sysroot) - - env["PATH"] = os.pathsep.join([os.fsdecode(wasi_sdk_path / "bin"), - os.environ["PATH"]]) - - return env - - -@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple, clean_ok=True) -def configure_wasi_python(context, working_dir): - """Configure the WASI/host build.""" - if not context.wasi_sdk_path or not context.wasi_sdk_path.exists(): - raise ValueError("WASI-SDK not found; " - "download from " - "https://github.com/WebAssembly/wasi-sdk and/or " - "specify via $WASI_SDK_PATH or --wasi-sdk") - - config_site = os.fsdecode(CHECKOUT / "Tools" / "wasm" / "config.site-wasm32-wasi") - - wasi_build_dir = working_dir.relative_to(CHECKOUT) - - python_build_dir = BUILD_DIR / "build" - lib_dirs = list(python_build_dir.glob("lib.*")) - assert len(lib_dirs) == 1, f"Expected a single lib.* directory in {python_build_dir}" - lib_dir = os.fsdecode(lib_dirs[0]) - pydebug = lib_dir.endswith("-pydebug") - python_version = lib_dir.removesuffix("-pydebug").rpartition("-")[-1] - sysconfig_data = f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" - if pydebug: - sysconfig_data += "-pydebug" - - # Use PYTHONPATH to include sysconfig data which must be anchored to the - # WASI guest's `/` directory. - args = {"GUEST_DIR": "/", - "HOST_DIR": CHECKOUT, - "ENV_VAR_NAME": "PYTHONPATH", - "ENV_VAR_VALUE": f"/{sysconfig_data}", - "PYTHON_WASM": working_dir / "python.wasm"} - # Check dynamically for wasmtime in case it was specified manually via - # `--host-runner`. - if WASMTIME_HOST_RUNNER_VAR in context.host_runner: - if wasmtime := shutil.which("wasmtime"): - args[WASMTIME_VAR_NAME] = wasmtime - else: - raise FileNotFoundError("wasmtime not found; download from " - "https://github.com/bytecodealliance/wasmtime") - host_runner = context.host_runner.format_map(args) - env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} - build_python = os.fsdecode(build_python_path()) - # The path to `configure` MUST be relative, else `python.wasm` is unable - # to find the stdlib due to Python not recognizing that it's being - # executed from within a checkout. - configure = [os.path.relpath(CHECKOUT / 'configure', working_dir), - f"--host={context.host_triple}", - f"--build={build_platform()}", - f"--with-build-python={build_python}"] - if pydebug: - configure.append("--with-pydebug") - if context.args: - configure.extend(context.args) - call(configure, - env=updated_env(env_additions | wasi_sdk_env(context)), - quiet=context.quiet) - - python_wasm = working_dir / "python.wasm" - exec_script = working_dir / "python.sh" - with exec_script.open("w", encoding="utf-8") as file: - file.write(f'#!/bin/sh\nexec {host_runner} {python_wasm} "$@"\n') - exec_script.chmod(0o755) - print(f"🏃‍♀️ Created {exec_script} ... ") - sys.stdout.flush() - - -@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple) -def make_wasi_python(context, working_dir): - """Run `make` for the WASI/host build.""" - call(["make", "--jobs", str(cpu_count()), "all"], - env=updated_env(), - quiet=context.quiet) - - exec_script = working_dir / "python.sh" - subprocess.check_call([exec_script, "--version"]) print( - f"🎉 Use '{exec_script.relative_to(context.init_dir)}' " - "to run CPython in wasm runtime" + "⚠️ WARNING: This script is deprecated and slated for removal in Python 3.20; " + "execute the `wasi/` directory instead (i.e. `python Tools/wasm/wasi`)\n", + file=sys.stderr, ) - -def build_all(context): - """Build everything.""" - steps = [configure_build_python, make_build_python, configure_wasi_python, - make_wasi_python] - for step in steps: - step(context) - -def clean_contents(context): - """Delete all files created by this script.""" - if CROSS_BUILD_DIR.exists(): - print(f"🧹 Deleting {CROSS_BUILD_DIR} ...") - shutil.rmtree(CROSS_BUILD_DIR) - - if LOCAL_SETUP.exists(): - with LOCAL_SETUP.open("rb") as file: - if file.read(len(LOCAL_SETUP_MARKER)) == LOCAL_SETUP_MARKER: - print(f"🧹 Deleting generated {LOCAL_SETUP} ...") - - -def main(): - default_host_runner = (f"{WASMTIME_HOST_RUNNER_VAR} run " - # Make sure the stack size will work for a pydebug - # build. - # Use 16 MiB stack. - "--wasm max-wasm-stack=16777216 " - # Enable thread support; causes use of preview1. - #"--wasm threads=y --wasi threads=y " - # Map the checkout to / to load the stdlib from /Lib. - "--dir {HOST_DIR}::{GUEST_DIR} " - # Set PYTHONPATH to the sysconfig data. - "--env {ENV_VAR_NAME}={ENV_VAR_VALUE}") - - parser = argparse.ArgumentParser() - subcommands = parser.add_subparsers(dest="subcommand") - build = subcommands.add_parser("build", help="Build everything") - configure_build = subcommands.add_parser("configure-build-python", - help="Run `configure` for the " - "build Python") - make_build = subcommands.add_parser("make-build-python", - help="Run `make` for the build Python") - configure_host = subcommands.add_parser("configure-host", - help="Run `configure` for the " - "host/WASI (pydebug builds " - "are inferred from the build " - "Python)") - make_host = subcommands.add_parser("make-host", - help="Run `make` for the host/WASI") - clean = subcommands.add_parser("clean", help="Delete files and directories " - "created by this script") - for subcommand in build, configure_build, make_build, configure_host, make_host: - subcommand.add_argument("--quiet", action="store_true", default=False, - dest="quiet", - help="Redirect output from subprocesses to a log file") - for subcommand in configure_build, configure_host: - subcommand.add_argument("--clean", action="store_true", default=False, - dest="clean", - help="Delete any relevant directories before building") - for subcommand in build, configure_build, configure_host: - subcommand.add_argument("args", nargs="*", - help="Extra arguments to pass to `configure`") - for subcommand in build, configure_host: - subcommand.add_argument("--wasi-sdk", type=pathlib.Path, - dest="wasi_sdk_path", - default=find_wasi_sdk(), - help="Path to wasi-sdk; defaults to " - "$WASI_SDK_PATH or /opt/wasi-sdk") - subcommand.add_argument("--host-runner", action="store", - default=default_host_runner, dest="host_runner", - help="Command template for running the WASI host " - "(default designed for wasmtime 14 or newer: " - f"`{default_host_runner}`)") - for subcommand in build, configure_host, make_host: - subcommand.add_argument("--host-triple", action="store", default="wasm32-wasip1", - help="The target triple for the WASI host build") - - context = parser.parse_args() - context.init_dir = pathlib.Path().absolute() - - dispatch = {"configure-build-python": configure_build_python, - "make-build-python": make_build_python, - "configure-host": configure_wasi_python, - "make-host": make_wasi_python, - "build": build_all, - "clean": clean_contents} - dispatch[context.subcommand](context) - - -if __name__ == "__main__": - main() + runpy.run_path(pathlib.Path(__file__).parent / "wasi", run_name="__main__") diff --git a/Tools/wasm/wasi/__main__.py b/Tools/wasm/wasi/__main__.py new file mode 100644 index 00000000000000..b57bcaca924380 --- /dev/null +++ b/Tools/wasm/wasi/__main__.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python3 + +import argparse +import contextlib +import functools +import os + +try: + from os import process_cpu_count as cpu_count +except ImportError: + from os import cpu_count +import pathlib +import shutil +import subprocess +import sys +import sysconfig +import tempfile + +CHECKOUT = pathlib.Path(__file__).parent.parent.parent.parent +assert (CHECKOUT / "configure").is_file(), ( + "Please update the location of the file" +) + +CROSS_BUILD_DIR = CHECKOUT / "cross-build" +# Build platform can also be found via `config.guess`. +BUILD_DIR = CROSS_BUILD_DIR / sysconfig.get_config_var("BUILD_GNU_TYPE") + +LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" +LOCAL_SETUP_MARKER = ( + b"# Generated by Tools/wasm/wasi .\n" + b"# Required to statically build extension modules." +) + +WASI_SDK_VERSION = 24 + +WASMTIME_VAR_NAME = "WASMTIME" +WASMTIME_HOST_RUNNER_VAR = f"{{{WASMTIME_VAR_NAME}}}" + + +def separator(): + """Print a separator line across the terminal width.""" + try: + tput_output = subprocess.check_output( + ["tput", "cols"], encoding="utf-8" + ) + except subprocess.CalledProcessError: + terminal_width = 80 + else: + terminal_width = int(tput_output.strip()) + print("⎯" * terminal_width) + + +def log(emoji, message, *, spacing=None): + """Print a notification with an emoji. + + If 'spacing' is None, calculate the spacing based on the number of code points + in the emoji as terminals "eat" a space when the emoji has multiple code points. + """ + if spacing is None: + spacing = " " if len(emoji) == 1 else " " + print("".join([emoji, spacing, message])) + + +def updated_env(updates={}): + """Create a new dict representing the environment to use. + + The changes made to the execution environment are printed out. + """ + env_defaults = {} + # https://reproducible-builds.org/docs/source-date-epoch/ + git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] + try: + epoch = subprocess.check_output( + git_epoch_cmd, encoding="utf-8" + ).strip() + env_defaults["SOURCE_DATE_EPOCH"] = epoch + except subprocess.CalledProcessError: + pass # Might be building from a tarball. + # This layering lets SOURCE_DATE_EPOCH from os.environ takes precedence. + environment = env_defaults | os.environ | updates + + env_diff = {} + for key, value in environment.items(): + if os.environ.get(key) != value: + env_diff[key] = value + + env_vars = ( + f"\n {key}={item}" for key, item in sorted(env_diff.items()) + ) + log("🌎", f"Environment changes:{''.join(env_vars)}") + + return environment + + +def subdir(working_dir, *, clean_ok=False): + """Decorator to change to a working directory.""" + + def decorator(func): + @functools.wraps(func) + def wrapper(context): + nonlocal working_dir + + if callable(working_dir): + working_dir = working_dir(context) + separator() + log("📁", os.fsdecode(working_dir)) + if ( + clean_ok + and getattr(context, "clean", False) + and working_dir.exists() + ): + log("🚮", "Deleting directory (--clean)...") + shutil.rmtree(working_dir) + + working_dir.mkdir(parents=True, exist_ok=True) + + with contextlib.chdir(working_dir): + return func(context, working_dir) + + return wrapper + + return decorator + + +def call(command, *, context=None, quiet=False, logdir=None, **kwargs): + """Execute a command. + + If 'quiet' is true, then redirect stdout and stderr to a temporary file. + """ + if context is not None: + quiet = context.quiet + logdir = context.logdir + elif quiet and logdir is None: + raise ValueError("When quiet is True, logdir must be specified") + + log("❯", " ".join(map(str, command)), spacing=" ") + if not quiet: + stdout = None + stderr = None + else: + stdout = tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + delete=False, + dir=logdir, + prefix="cpython-wasi-", + suffix=".log", + ) + stderr = subprocess.STDOUT + log("📝", f"Logging output to {stdout.name} (--quiet)...") + + subprocess.check_call(command, **kwargs, stdout=stdout, stderr=stderr) + + +def build_python_path(): + """The path to the build Python binary.""" + binary = BUILD_DIR / "python" + if not binary.is_file(): + binary = binary.with_suffix(".exe") + if not binary.is_file(): + raise FileNotFoundError( + f"Unable to find `python(.exe)` in {BUILD_DIR}" + ) + + return binary + + +def build_python_is_pydebug(): + """Find out if the build Python is a pydebug build.""" + test = "import sys, test.support; sys.exit(test.support.Py_DEBUG)" + result = subprocess.run( + [build_python_path(), "-c", test], + capture_output=True, + ) + return bool(result.returncode) + + +@subdir(BUILD_DIR, clean_ok=True) +def configure_build_python(context, working_dir): + """Configure the build/host Python.""" + if LOCAL_SETUP.exists(): + if LOCAL_SETUP.read_bytes() == LOCAL_SETUP_MARKER: + log("👍", f"{LOCAL_SETUP} exists ...") + else: + log("⚠️", f"{LOCAL_SETUP} exists, but has unexpected contents") + else: + log("📝", f"Creating {LOCAL_SETUP} ...") + LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) + + configure = [os.path.relpath(CHECKOUT / "configure", working_dir)] + if context.args: + configure.extend(context.args) + + call(configure, context=context) + + +@subdir(BUILD_DIR) +def make_build_python(context, working_dir): + """Make/build the build Python.""" + call(["make", "--jobs", str(cpu_count()), "all"], context=context) + + binary = build_python_path() + cmd = [ + binary, + "-c", + "import sys; " + "print(f'{sys.version_info.major}.{sys.version_info.minor}')", + ] + version = subprocess.check_output(cmd, encoding="utf-8").strip() + + log("🎉", f"{binary} {version}") + + +def find_wasi_sdk(): + """Find the path to the WASI SDK.""" + wasi_sdk_path = None + + if wasi_sdk_path_env_var := os.environ.get("WASI_SDK_PATH"): + wasi_sdk_path = pathlib.Path(wasi_sdk_path_env_var) + else: + opt_path = pathlib.Path("/opt") + # WASI SDK versions have a ``.0`` suffix, but it's a constant; the WASI SDK team + # has said they don't plan to ever do a point release and all of their Git tags + # lack the ``.0`` suffix. + # Starting with WASI SDK 23, the tarballs went from containing a directory named + # ``wasi-sdk-{WASI_SDK_VERSION}.0`` to e.g. + # ``wasi-sdk-{WASI_SDK_VERSION}.0-x86_64-linux``. + potential_sdks = [ + path + for path in opt_path.glob(f"wasi-sdk-{WASI_SDK_VERSION}.0*") + if path.is_dir() + ] + if len(potential_sdks) == 1: + wasi_sdk_path = potential_sdks[0] + elif (default_path := opt_path / "wasi-sdk").is_dir(): + wasi_sdk_path = default_path + + # Starting with WASI SDK 25, a VERSION file is included in the root + # of the SDK directory that we can read to warn folks when they are using + # an unsupported version. + if wasi_sdk_path and (version_file := wasi_sdk_path / "VERSION").is_file(): + version_details = version_file.read_text(encoding="utf-8") + found_version = version_details.splitlines()[0] + # Make sure there's a trailing dot to avoid false positives if somehow the + # supported version is a prefix of the found version (e.g. `25` and `2567`). + if not found_version.startswith(f"{WASI_SDK_VERSION}."): + major_version = found_version.partition(".")[0] + log( + "⚠️", + f" Found WASI SDK {major_version}, " + f"but WASI SDK {WASI_SDK_VERSION} is the supported version", + ) + + return wasi_sdk_path + + +def wasi_sdk_env(context): + """Calculate environment variables for building with wasi-sdk.""" + wasi_sdk_path = context.wasi_sdk_path + sysroot = wasi_sdk_path / "share" / "wasi-sysroot" + env = { + "CC": "clang", + "CPP": "clang-cpp", + "CXX": "clang++", + "AR": "llvm-ar", + "RANLIB": "ranlib", + } + + for env_var, binary_name in list(env.items()): + env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name) + + if not wasi_sdk_path.name.startswith("wasi-sdk"): + for compiler in ["CC", "CPP", "CXX"]: + env[compiler] += f" --sysroot={sysroot}" + + env["PKG_CONFIG_PATH"] = "" + env["PKG_CONFIG_LIBDIR"] = os.pathsep.join( + map( + os.fsdecode, + [sysroot / "lib" / "pkgconfig", sysroot / "share" / "pkgconfig"], + ) + ) + env["PKG_CONFIG_SYSROOT_DIR"] = os.fsdecode(sysroot) + + env["WASI_SDK_PATH"] = os.fsdecode(wasi_sdk_path) + env["WASI_SYSROOT"] = os.fsdecode(sysroot) + + env["PATH"] = os.pathsep.join([ + os.fsdecode(wasi_sdk_path / "bin"), + os.environ["PATH"], + ]) + + return env + + +@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple, clean_ok=True) +def configure_wasi_python(context, working_dir): + """Configure the WASI/host build.""" + if not context.wasi_sdk_path or not context.wasi_sdk_path.exists(): + raise ValueError( + "WASI-SDK not found; " + "download from " + "https://github.com/WebAssembly/wasi-sdk and/or " + "specify via $WASI_SDK_PATH or --wasi-sdk" + ) + + config_site = os.fsdecode( + CHECKOUT / "Tools" / "wasm" / "wasi" / "config.site-wasm32-wasi" + ) + + wasi_build_dir = working_dir.relative_to(CHECKOUT) + + python_build_dir = BUILD_DIR / "build" + lib_dirs = list(python_build_dir.glob("lib.*")) + assert len(lib_dirs) == 1, ( + f"Expected a single lib.* directory in {python_build_dir}" + ) + lib_dir = os.fsdecode(lib_dirs[0]) + python_version = lib_dir.rpartition("-")[-1] + sysconfig_data_dir = ( + f"{wasi_build_dir}/build/lib.wasi-wasm32-{python_version}" + ) + + # Use PYTHONPATH to include sysconfig data which must be anchored to the + # WASI guest's `/` directory. + args = { + "GUEST_DIR": "/", + "HOST_DIR": CHECKOUT, + "ENV_VAR_NAME": "PYTHONPATH", + "ENV_VAR_VALUE": f"/{sysconfig_data_dir}", + "PYTHON_WASM": working_dir / "python.wasm", + } + # Check dynamically for wasmtime in case it was specified manually via + # `--host-runner`. + if WASMTIME_HOST_RUNNER_VAR in context.host_runner: + if wasmtime := shutil.which("wasmtime"): + args[WASMTIME_VAR_NAME] = wasmtime + else: + raise FileNotFoundError( + "wasmtime not found; download from " + "https://github.com/bytecodealliance/wasmtime" + ) + host_runner = context.host_runner.format_map(args) + env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} + build_python = os.fsdecode(build_python_path()) + # The path to `configure` MUST be relative, else `python.wasm` is unable + # to find the stdlib due to Python not recognizing that it's being + # executed from within a checkout. + configure = [ + os.path.relpath(CHECKOUT / "configure", working_dir), + f"--host={context.host_triple}", + f"--build={BUILD_DIR.name}", + f"--with-build-python={build_python}", + ] + if build_python_is_pydebug(): + configure.append("--with-pydebug") + if context.args: + configure.extend(context.args) + call( + configure, + env=updated_env(env_additions | wasi_sdk_env(context)), + context=context, + ) + + python_wasm = working_dir / "python.wasm" + exec_script = working_dir / "python.sh" + with exec_script.open("w", encoding="utf-8") as file: + file.write(f'#!/bin/sh\nexec {host_runner} {python_wasm} "$@"\n') + exec_script.chmod(0o755) + log("🏃", f"Created {exec_script} (--host-runner)... ") + sys.stdout.flush() + + +@subdir(lambda context: CROSS_BUILD_DIR / context.host_triple) +def make_wasi_python(context, working_dir): + """Run `make` for the WASI/host build.""" + call( + ["make", "--jobs", str(cpu_count()), "all"], + env=updated_env(), + context=context, + ) + + exec_script = working_dir / "python.sh" + call([exec_script, "--version"], quiet=False) + log( + "🎉", + f"Use `{exec_script.relative_to(context.init_dir)}` " + "to run CPython w/ the WASI host specified by --host-runner", + ) + + +def clean_contents(context): + """Delete all files created by this script.""" + if CROSS_BUILD_DIR.exists(): + log("🧹", f"Deleting {CROSS_BUILD_DIR} ...") + shutil.rmtree(CROSS_BUILD_DIR) + + if LOCAL_SETUP.exists(): + if LOCAL_SETUP.read_bytes() == LOCAL_SETUP_MARKER: + log("🧹", f"Deleting generated {LOCAL_SETUP} ...") + + +def build_steps(*steps): + """Construct a command from other steps.""" + + def builder(context): + for step in steps: + step(context) + + return builder + + +def main(): + default_host_triple = "wasm32-wasip1" + default_wasi_sdk = find_wasi_sdk() + default_host_runner = ( + f"{WASMTIME_HOST_RUNNER_VAR} run " + # Make sure the stack size will work for a pydebug + # build. + # Use 16 MiB stack. + "--wasm max-wasm-stack=16777216 " + # Enable thread support; causes use of preview1. + # "--wasm threads=y --wasi threads=y " + # Map the checkout to / to load the stdlib from /Lib. + "--dir {HOST_DIR}::{GUEST_DIR} " + # Set PYTHONPATH to the sysconfig data. + "--env {ENV_VAR_NAME}={ENV_VAR_VALUE}" + ) + default_logdir = pathlib.Path(tempfile.gettempdir()) + + parser = argparse.ArgumentParser() + subcommands = parser.add_subparsers(dest="subcommand") + build = subcommands.add_parser("build", help="Build everything") + configure_build = subcommands.add_parser( + "configure-build-python", help="Run `configure` for the build Python" + ) + make_build = subcommands.add_parser( + "make-build-python", help="Run `make` for the build Python" + ) + build_python = subcommands.add_parser( + "build-python", help="Build the build Python" + ) + configure_host = subcommands.add_parser( + "configure-host", + help="Run `configure` for the " + "host/WASI (pydebug builds " + "are inferred from the build " + "Python)", + ) + make_host = subcommands.add_parser( + "make-host", help="Run `make` for the host/WASI" + ) + build_host = subcommands.add_parser( + "build-host", help="Build the host/WASI Python" + ) + subcommands.add_parser( + "clean", help="Delete files and directories created by this script" + ) + for subcommand in ( + build, + configure_build, + make_build, + build_python, + configure_host, + make_host, + build_host, + ): + subcommand.add_argument( + "--quiet", + action="store_true", + default=False, + dest="quiet", + help="Redirect output from subprocesses to a log file", + ) + subcommand.add_argument( + "--logdir", + type=pathlib.Path, + default=default_logdir, + help=f"Directory to store log files; defaults to {default_logdir}", + ) + for subcommand in ( + configure_build, + configure_host, + build_python, + build_host, + ): + subcommand.add_argument( + "--clean", + action="store_true", + default=False, + dest="clean", + help="Delete any relevant directories before building", + ) + for subcommand in ( + build, + configure_build, + configure_host, + build_python, + build_host, + ): + subcommand.add_argument( + "args", nargs="*", help="Extra arguments to pass to `configure`" + ) + for subcommand in build, configure_host, build_host: + subcommand.add_argument( + "--wasi-sdk", + type=pathlib.Path, + dest="wasi_sdk_path", + default=default_wasi_sdk, + help=f"Path to the WASI SDK; defaults to {default_wasi_sdk}", + ) + subcommand.add_argument( + "--host-runner", + action="store", + default=default_host_runner, + dest="host_runner", + help="Command template for running the WASI host; defaults to " + f"`{default_host_runner}`", + ) + for subcommand in build, configure_host, make_host, build_host: + subcommand.add_argument( + "--host-triple", + action="store", + default=default_host_triple, + help="The target triple for the WASI host build; " + f"defaults to {default_host_triple}", + ) + + context = parser.parse_args() + context.init_dir = pathlib.Path().absolute() + + build_build_python = build_steps(configure_build_python, make_build_python) + build_wasi_python = build_steps(configure_wasi_python, make_wasi_python) + + dispatch = { + "configure-build-python": configure_build_python, + "make-build-python": make_build_python, + "build-python": build_build_python, + "configure-host": configure_wasi_python, + "make-host": make_wasi_python, + "build-host": build_wasi_python, + "build": build_steps(build_build_python, build_wasi_python), + "clean": clean_contents, + } + dispatch[context.subcommand](context) + + +if __name__ == "__main__": + main() diff --git a/Tools/wasm/config.site-wasm32-wasi b/Tools/wasm/wasi/config.site-wasm32-wasi similarity index 100% rename from Tools/wasm/config.site-wasm32-wasi rename to Tools/wasm/wasi/config.site-wasm32-wasi diff --git a/Tools/wasm/wasm_build.py b/Tools/wasm/wasm_build.py index bcb80212362b71..3f1bde3853fce7 100755 --- a/Tools/wasm/wasm_build.py +++ b/Tools/wasm/wasm_build.py @@ -21,9 +21,10 @@ ./Tools/wasm/wasm_builder.py --clean build build """ + import argparse -import enum import dataclasses +import enum import logging import os import pathlib @@ -38,18 +39,12 @@ import time import warnings import webbrowser +from collections.abc import Callable, Iterable # for Python 3.8 from typing import ( - cast, Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Union, + cast, ) logger = logging.getLogger("wasm_build") @@ -67,7 +62,9 @@ # path to Emscripten SDK config file. # auto-detect's EMSDK in /opt/emsdk without ". emsdk_env.sh". -EM_CONFIG = pathlib.Path(os.environ.setdefault("EM_CONFIG", "/opt/emsdk/.emscripten")) +EM_CONFIG = pathlib.Path( + os.environ.setdefault("EM_CONFIG", "/opt/emsdk/.emscripten") +) EMSDK_MIN_VERSION = (3, 1, 19) EMSDK_BROKEN_VERSION = { (3, 1, 14): "https://github.com/emscripten-core/emscripten/issues/17338", @@ -119,7 +116,7 @@ def parse_emconfig( emconfig: pathlib.Path = EM_CONFIG, -) -> Tuple[pathlib.Path, pathlib.Path]: +) -> tuple[pathlib.Path, pathlib.Path]: """Parse EM_CONFIG file and lookup EMSCRIPTEN_ROOT and NODE_JS. The ".emscripten" config file is a Python snippet that uses "EM_CONFIG" @@ -131,7 +128,7 @@ def parse_emconfig( with open(emconfig, encoding="utf-8") as f: code = f.read() # EM_CONFIG file is a Python snippet - local: Dict[str, Any] = {} + local: dict[str, Any] = {} exec(code, globals(), local) emscripten_root = pathlib.Path(local["EMSCRIPTEN_ROOT"]) node_js = pathlib.Path(local["NODE_JS"]) @@ -189,16 +186,16 @@ class Platform: name: str pythonexe: str - config_site: Optional[pathlib.PurePath] - configure_wrapper: Optional[pathlib.Path] - make_wrapper: Optional[pathlib.PurePath] - environ: Dict[str, Any] + config_site: pathlib.PurePath | None + configure_wrapper: pathlib.Path | None + make_wrapper: pathlib.PurePath | None + environ: dict[str, Any] check: Callable[[], None] # Used for build_emports(). - ports: Optional[pathlib.PurePath] - cc: Optional[pathlib.PurePath] + ports: pathlib.PurePath | None + cc: pathlib.PurePath | None - def getenv(self, profile: "BuildProfile") -> Dict[str, Any]: + def getenv(self, profile: "BuildProfile") -> dict[str, Any]: return self.environ.copy() @@ -261,8 +258,7 @@ def _check_emscripten() -> None: # git / upstream / tot-upstream installation version = version[:-4] version_tuple = cast( - Tuple[int, int, int], - tuple(int(v) for v in version.split(".")) + tuple[int, int, int], tuple(int(v) for v in version.split(".")) ) if version_tuple < EMSDK_MIN_VERSION: raise ConditionError( @@ -386,7 +382,7 @@ def get_extra_paths(self) -> Iterable[pathlib.PurePath]: return [] @property - def emport_args(self) -> List[str]: + def emport_args(self) -> list[str]: """Host-specific port args (Emscripten).""" cls = type(self) if self is cls.wasm64_emscripten: @@ -397,7 +393,7 @@ def emport_args(self) -> List[str]: return [] @property - def embuilder_args(self) -> List[str]: + def embuilder_args(self) -> list[str]: """Host-specific embuilder args (Emscripten).""" cls = type(self) if self is cls.wasm64_emscripten: @@ -420,7 +416,7 @@ def is_browser(self) -> bool: return self in {cls.browser, cls.browser_debug} @property - def emport_args(self) -> List[str]: + def emport_args(self) -> list[str]: """Target-specific port args.""" cls = type(self) if self in {cls.browser_debug, cls.node_debug}: @@ -446,9 +442,9 @@ class BuildProfile: name: str support_level: SupportLevel host: Host - target: Union[EmscriptenTarget, None] = None - dynamic_linking: Union[bool, None] = None - pthreads: Union[bool, None] = None + target: EmscriptenTarget | None = None + dynamic_linking: bool | None = None + pthreads: bool | None = None default_testopts: str = "-j2" @property @@ -472,7 +468,7 @@ def makefile(self) -> pathlib.Path: return self.builddir / "Makefile" @property - def configure_cmd(self) -> List[str]: + def configure_cmd(self) -> list[str]: """Generate configure command""" # use relative path, so WASI tests can find lib prefix. # pathlib.Path.relative_to() does not work here. @@ -507,7 +503,7 @@ def configure_cmd(self) -> List[str]: return cmd @property - def make_cmd(self) -> List[str]: + def make_cmd(self) -> list[str]: """Generate make command""" cmd = ["make"] platform = self.host.platform @@ -515,10 +511,10 @@ def make_cmd(self) -> List[str]: cmd.insert(0, os.fspath(platform.make_wrapper)) return cmd - def getenv(self) -> Dict[str, Any]: + def getenv(self) -> dict[str, Any]: """Generate environ dict for platform""" env = os.environ.copy() - if hasattr(os, 'process_cpu_count'): + if hasattr(os, "process_cpu_count"): cpu_count = os.process_cpu_count() else: cpu_count = os.cpu_count() @@ -529,7 +525,7 @@ def getenv(self) -> Dict[str, Any]: env.pop(key, None) elif key == "PATH": # list of path items, prefix with extra paths - new_path: List[pathlib.PurePath] = [] + new_path: list[pathlib.PurePath] = [] new_path.extend(self.host.get_extra_paths()) new_path.extend(value) env[key] = os.pathsep.join(os.fspath(p) for p in new_path) @@ -547,7 +543,7 @@ def _run_cmd( self, cmd: Iterable[str], args: Iterable[str] = (), - cwd: Optional[pathlib.Path] = None, + cwd: pathlib.Path | None = None, ) -> int: cmd = list(cmd) cmd.extend(args) @@ -585,7 +581,7 @@ def run_pythoninfo(self, *args: str) -> int: self._check_execute() return self.run_make("pythoninfo", *args) - def run_test(self, target: str, testopts: Optional[str] = None) -> int: + def run_test(self, target: str, testopts: str | None = None) -> int: """Run buildbottests""" self._check_execute() if testopts is None: @@ -596,7 +592,9 @@ def run_py(self, *args: str) -> int: """Run Python with hostrunner""" self._check_execute() return self.run_make( - "--eval", f"run: all; $(HOSTRUNNER) ./$(PYTHON) {shlex.join(args)}", "run" + "--eval", + f"run: all; $(HOSTRUNNER) ./$(PYTHON) {shlex.join(args)}", + "run", ) def run_browser(self, bind: str = "127.0.0.1", port: int = 8000) -> None: @@ -666,9 +664,12 @@ def build_emports(self, force: bool = False) -> None: # Pre-build libbz2, libsqlite3, libz, and some system libs. ports_cmd.extend(["-sUSE_ZLIB", "-sUSE_BZIP2", "-sUSE_SQLITE3"]) # Multi-threaded sqlite3 has different suffix - embuilder_cmd.extend( - ["build", "bzip2", "sqlite3-mt" if self.pthreads else "sqlite3", "zlib"] - ) + embuilder_cmd.extend([ + "build", + "bzip2", + "sqlite3-mt" if self.pthreads else "sqlite3", + "zlib", + ]) self._run_cmd(embuilder_cmd, cwd=SRCDIR) @@ -816,8 +817,8 @@ def build_emports(self, force: bool = False) -> None: ) # Don't list broken and experimental variants in help -platforms_choices = list(p.name for p in _profiles) + ["cleanall"] -platforms_help = list(p.name for p in _profiles if p.support_level) + ["cleanall"] +platforms_choices = [p.name for p in _profiles] + ["cleanall"] +platforms_help = [p.name for p in _profiles if p.support_level] + ["cleanall"] parser.add_argument( "platform", metavar="PLATFORM", @@ -825,18 +826,18 @@ def build_emports(self, force: bool = False) -> None: choices=platforms_choices, ) -ops = dict( - build="auto build (build 'build' Python, emports, configure, compile)", - configure="run ./configure", - compile="run 'make all'", - pythoninfo="run 'make pythoninfo'", - test="run 'make buildbottest TESTOPTS=...' (supports parallel tests)", - hostrunnertest="run 'make hostrunnertest TESTOPTS=...'", - repl="start interactive REPL / webserver + browser session", - clean="run 'make clean'", - cleanall="remove all build directories", - emports="build Emscripten port with embuilder (only Emscripten)", -) +ops = { + "build": "auto build (build 'build' Python, emports, configure, compile)", + "configure": "run ./configure", + "compile": "run 'make all'", + "pythoninfo": "run 'make pythoninfo'", + "test": "run 'make buildbottest TESTOPTS=...' (supports parallel tests)", + "hostrunnertest": "run 'make hostrunnertest TESTOPTS=...'", + "repl": "start interactive REPL / webserver + browser session", + "clean": "run 'make clean'", + "cleanall": "remove all build directories", + "emports": "build Emscripten port with embuilder (only Emscripten)", +} ops_help = "\n".join(f"{op:16s} {help}" for op, help in ops.items()) parser.add_argument( "ops", diff --git a/configure b/configure index c51192f12c8223..645628fecf2df7 100755 --- a/configure +++ b/configure @@ -965,6 +965,7 @@ LDLIBRARY LIBRARY BUILDEXEEXT NO_AS_NEEDED +_Py_STACK_GROWS_DOWN MULTIARCH_CPPFLAGS PLATFORM_TRIPLET MULTIARCH @@ -1826,8 +1827,8 @@ Optional Features: no) --enable-profiling enable C-level code profiling with gprof (default is no) - --disable-gil enable experimental support for running without the - GIL (default is no) + --disable-gil enable support for running without the GIL (default + is no) --enable-pystats enable internal statistics gathering (default is no) --enable-optimizations enable expensive, stable optimizations (PGO, etc.) (default is no) @@ -3816,7 +3817,7 @@ fi -for ac_prog in python$PACKAGE_VERSION python3.13 python3.12 python3.11 python3.10 python3 python +for ac_prog in python$PACKAGE_VERSION python3.15 python3.14 python3.13 python3.12 python3.11 python3.10 python3 python do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4359,7 +4360,7 @@ then : yes) case $ac_sys_system in Darwin) enableval=/Library/Frameworks ;; - iOS) enableval=iOS/Frameworks/\$\(MULTIARCH\) ;; + iOS) enableval=Apple/iOS/Frameworks/\$\(MULTIARCH\) ;; *) as_fn_error $? "Unknown platform for framework build" "$LINENO" 5 esac esac @@ -4470,9 +4471,9 @@ then : prefix=$PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLNAMEPREFIX="@rpath/$PYTHONFRAMEWORKDIR" - RESSRCDIR=iOS/Resources + RESSRCDIR=Apple/iOS/Resources - ac_config_files="$ac_config_files iOS/Resources/Info.plist" + ac_config_files="$ac_config_files Apple/iOS/Resources/Info.plist" ;; *) @@ -7211,6 +7212,18 @@ if test x$MULTIARCH != x; then fi +# Guess C stack direction +case $host in #( + hppa*) : + _Py_STACK_GROWS_DOWN=0 ;; #( + *) : + _Py_STACK_GROWS_DOWN=1 ;; +esac + +printf "%s\n" "#define _Py_STACK_GROWS_DOWN $_Py_STACK_GROWS_DOWN" >>confdefs.h + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PEP 11 support tier" >&5 printf %s "checking for PEP 11 support tier... " >&6; } case $host/$ac_cv_cc_name in #( @@ -7254,6 +7267,8 @@ case $host/$ac_cv_cc_name in #( PY_SUPPORT_TIER=3 ;; #( x86_64-*-linux-android/clang) : PY_SUPPORT_TIER=3 ;; #( + wasm32-*-emscripten/emcc) : + PY_SUPPORT_TIER=3 ;; #( *) : PY_SUPPORT_TIER=0 @@ -9601,9 +9616,10 @@ fi as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" - as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback" + as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY" + as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback" as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" + as_fn_append LINKFORSHARED " -sTEXTDECODER=2" if test "x$enable_wasm_dynamic_linking" = xyes then : @@ -10863,7 +10879,7 @@ then : else case e in #( e) as_fn_append CFLAGS_NODIST " $jit_flags" - REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host}" + REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\"" JIT_STENCILS_H="jit_stencils.h" if test "x$Py_DEBUG" = xtrue then : @@ -12979,13 +12995,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \ else have_largefile_support="no" fi -case $ac_sys_system in #( - Emscripten) : - have_largefile_support="no" - ;; #( - *) : - ;; -esac if test "x$have_largefile_support" = xyes then : @@ -14052,6 +14061,7 @@ fi + have_uuid=missing for ac_header in uuid.h @@ -14061,6 +14071,7 @@ if test "x$ac_cv_header_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h + for ac_func in uuid_create uuid_enc_be do : as_ac_var=`printf "%s\n" "ac_cv_func_$ac_func" | sed "$as_sed_sh"` @@ -14070,7 +14081,9 @@ then : cat >>confdefs.h <<_ACEOF #define `printf "%s\n" "HAVE_$ac_func" | sed "$as_sed_cpp"` 1 _ACEOF - have_uuid=yes + + have_uuid=yes + ac_cv_have_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} @@ -14160,6 +14173,7 @@ if test "x$ac_cv_header_uuid_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + ac_cv_have_uuid_uuid_h=yes py_check_lib_save_LIBS=$LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for uuid_generate_time in -luuid" >&5 printf %s "checking for uuid_generate_time in -luuid... " >&6; } @@ -14257,8 +14271,9 @@ fi printf "%s\n" "$ac_cv_lib_uuid_uuid_generate_time_safe" >&6; } if test "x$ac_cv_lib_uuid_uuid_generate_time_safe" = xyes then : - have_uuid=yes - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes fi @@ -14302,6 +14317,7 @@ if test "x$ac_cv_header_uuid_uuid_h" = xyes then : printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + ac_cv_have_uuid_uuid_h=yes py_check_lib_save_LIBS=$LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for uuid_generate_time in -luuid" >&5 printf %s "checking for uuid_generate_time in -luuid... " >&6; } @@ -14399,8 +14415,9 @@ fi printf "%s\n" "$ac_cv_lib_uuid_uuid_generate_time_safe" >&6; } if test "x$ac_cv_lib_uuid_uuid_generate_time_safe" = xyes then : - have_uuid=yes - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes fi @@ -14431,10 +14448,16 @@ else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } have_uuid=yes - printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h - - printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h - + ac_cv_have_uuid_generate_time_safe=yes + # The uuid.h file to include may be <uuid.h> *or* <uuid/uuid.h>. + # Since pkg-config --cflags uuid may return -I/usr/include/uuid, + # it's possible to write '#include <uuid.h>' in _uuidmodule.c, + # assuming that the compiler flags are properly updated. + # + # Ideally, we should have defined HAVE_UUID_H if and only if + # #include <uuid.h> can be written, *without* assuming extra + # include path. + ac_cv_have_uuid_h=yes fi @@ -14455,6 +14478,7 @@ if test "x$ac_cv_func_uuid_generate_time" = xyes then : have_uuid=yes + ac_cv_have_uuid_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} @@ -14465,6 +14489,24 @@ fi done +fi + +if test "x$ac_cv_have_uuid_h" = xyes +then : + printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h + +fi +if test "x$ac_cv_have_uuid_uuid_h" = xyes +then : + printf "%s\n" "#define HAVE_UUID_UUID_H 1" >>confdefs.h + +fi +if test "x$ac_cv_have_uuid_generate_time_safe" = xyes +then : + + printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h + + fi # gh-124228: While the libuuid library is available on NetBSD, it supports only UUID version 4. @@ -14480,6 +14522,164 @@ then : have_uuid=no fi +# gh-132710: The UUID node is fetched by using libuuid when possible +# and cached. While the node is constant within the same process, +# different interpreters may have different values as libuuid may +# randomize the node value if the latter cannot be deduced. +# +# Consumers may define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC +# to indicate that libuuid is unstable and should not be relied +# upon to deduce the MAC address. + + +if test "$have_uuid" = "yes" -a "$HAVE_UUID_GENERATE_TIME_SAFE" = "1" +then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if uuid_generate_time_safe() node value is stable" >&5 +printf %s "checking if uuid_generate_time_safe() node value is stable... " >&6; } + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though <uuid.h> is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + if test "$cross_compiling" = yes +then : + + +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include <inttypes.h> // PRIu64 + #include <stdint.h> // uint64_t + #include <stdio.h> // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include <uuid.h> + #else + #include <uuid/uuid.h> + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + } +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + + py_cv_uuid_node1=`cat conftest.out` + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though <uuid.h> is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + if test "$cross_compiling" = yes +then : + + +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include <inttypes.h> // PRIu64 + #include <stdint.h> // uint64_t + #include <stdio.h> // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include <uuid.h> + #else + #include <uuid/uuid.h> + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + } +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + + py_cv_uuid_node2=`cat conftest.out` + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + if test -n "$py_cv_uuid_node1" -a "$py_cv_uuid_node1" = "$py_cv_uuid_node2" + then + printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: stable" >&5 +printf "%s\n" "stable" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unstable" >&5 +printf "%s\n" "unstable" >&6; } + fi +fi + # 'Real Time' functions on Solaris # posix4 on Solaris 2.6 # pthread (first!) on Linux @@ -15558,7 +15758,7 @@ fi printf "%s\n" "$ac_cv_ffi_complex_double_supported" >&6; } if test "$ac_cv_ffi_complex_double_supported" = "yes"; then -printf "%s\n" "#define Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h +printf "%s\n" "#define _Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h fi @@ -15711,15 +15911,15 @@ LIBS=$save_LIBS else case e in #( - e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; consider using a system installed mpdecimal library." >&5 -printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; consider using a system installed mpdecimal library." >&2;} ;; + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&5 +printf "%s\n" "$as_me: WARNING: the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library." >&2;} ;; esac fi if test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no" then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdecimal found; falling back to bundled libmpdecimal (deprecated and scheduled for removal in Python 3.15)" >&5 -printf "%s\n" "$as_me: WARNING: no system libmpdecimal found; falling back to bundled libmpdecimal (deprecated and scheduled for removal in Python 3.15)" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: no system libmpdec found; falling back to bundled libmpdec (deprecated and scheduled for removal in Python 3.16)" >&5 +printf "%s\n" "$as_me: WARNING: no system libmpdec found; falling back to bundled libmpdec (deprecated and scheduled for removal in Python 3.16)" >&2;} LIBMPDEC_CFLAGS="-I\$(srcdir)/Modules/_decimal/libmpdec" LIBMPDEC_LIBS="-lm \$(LIBMPDEC_A)" LIBMPDEC_INTERNAL="\$(LIBMPDEC_HEADERS) \$(LIBMPDEC_A)" @@ -18864,15 +19064,27 @@ printf "%s\n" "#define WITH_DTRACE 1" >>confdefs.h # linked into the binary. Correspondingly, dtrace(1) is missing the ELF # generation flag '-G'. We check for presence of this flag, rather than # hardcoding support by OS, in the interest of robustness. + # + # NetBSD DTrace requires the -x nolibs flag to avoid system library conflicts + # and uses header generation for testing instead of object generation. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DTrace probes require linking" >&5 printf %s "checking whether DTrace probes require linking... " >&6; } if test ${ac_cv_dtrace_link+y} then : printf %s "(cached) " >&6 else case e in #( - e) ac_cv_dtrace_link=no + e) + ac_cv_dtrace_link=no echo 'BEGIN{}' > conftest.d - "$DTRACE" $DFLAGS -G -s conftest.d -o conftest.o > /dev/null 2>&1 && \ + case $host in + *netbsd*) + DTRACE_TEST_FLAGS="-x nolibs -h" + ;; + *) + DTRACE_TEST_FLAGS="-G" + ;; + esac + "$DTRACE" $DFLAGS $DTRACE_TEST_FLAGS -s conftest.d -o conftest.o > /dev/null 2>&1 && \ ac_cv_dtrace_link=yes ;; esac @@ -18882,6 +19094,12 @@ printf "%s\n" "$ac_cv_dtrace_link" >&6; } if test "$ac_cv_dtrace_link" = "yes"; then DTRACE_OBJS="Python/pydtrace.o" fi + # Set NetBSD-specific DTrace flags in DFLAGS + case $host in + *netbsd*) + DFLAGS="$DFLAGS -x nolibs" + ;; + esac fi PLATFORM_HEADERS= @@ -18890,7 +19108,7 @@ PLATFORM_OBJS= case $ac_sys_system in #( Emscripten) : - as_fn_append PLATFORM_OBJS ' Python/emscripten_signal.o Python/emscripten_trampoline.o' + as_fn_append PLATFORM_OBJS ' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_trampoline_wasm.o Python/emscripten_syscalls.o' as_fn_append PLATFORM_HEADERS ' $(srcdir)/Include/internal/pycore_emscripten_signal.h $(srcdir)/Include/internal/pycore_emscripten_trampoline.h' ;; #( *) : @@ -19267,6 +19485,12 @@ if test "x$ac_cv_func_getlogin" = xyes then : printf "%s\n" "#define HAVE_GETLOGIN 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "getlogin_r" "ac_cv_func_getlogin_r" +if test "x$ac_cv_func_getlogin_r" = xyes +then : + printf "%s\n" "#define HAVE_GETLOGIN_R 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "getpeername" "ac_cv_func_getpeername" if test "x$ac_cv_func_getpeername" = xyes @@ -22451,11 +22675,301 @@ fi # Put the nasty error message in config.log where it belongs echo "$LIBZSTD_PKG_ERRORS" >&5 + + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS" + CFLAGS="$CFLAGS $LIBZSTD_CFLAGS" + LIBS="$LIBS $LIBZSTD_LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing ZDICT_finalizeDictionary" >&5 +printf %s "checking for library containing ZDICT_finalizeDictionary... " >&6; } +if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char ZDICT_finalizeDictionary (void); +int +main (void) +{ +return ZDICT_finalizeDictionary (); + ; + return 0; +} +_ACEOF +for ac_lib in '' zstd +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_ZDICT_finalizeDictionary=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + break +fi +done +if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + +else case e in #( + e) ac_cv_search_ZDICT_finalizeDictionary=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_ZDICT_finalizeDictionary" >&5 +printf "%s\n" "$ac_cv_search_ZDICT_finalizeDictionary" >&6; } +ac_res=$ac_cv_search_ZDICT_finalizeDictionary +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking ZSTD_VERSION_NUMBER >= 1.4.5" >&5 +printf %s "checking ZSTD_VERSION_NUMBER >= 1.4.5... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include "zstd.h" +int +main (void) +{ + + #if ZSTD_VERSION_NUMBER < 10405 + # error "zstd version is too old" + #endif + + ; + return 0; +} + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + for ac_header in zstd.h zdict.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 +_ACEOF + have_libzstd=yes +else case e in #( + e) have_libzstd=no ;; +esac +fi + +done + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } have_libzstd=no + ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +else case e in #( + e) have_libzstd=no ;; +esac +fi + + if test "x$have_libzstd" = xyes +then : + + LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""} + LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"} + +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + elif test $pkg_failed = untried; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } + + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS" + CFLAGS="$CFLAGS $LIBZSTD_CFLAGS" + LIBS="$LIBS $LIBZSTD_LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing ZDICT_finalizeDictionary" >&5 +printf %s "checking for library containing ZDICT_finalizeDictionary... " >&6; } +if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char ZDICT_finalizeDictionary (void); +int +main (void) +{ +return ZDICT_finalizeDictionary (); + ; + return 0; +} +_ACEOF +for ac_lib in '' zstd +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_ZDICT_finalizeDictionary=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + break +fi +done +if test ${ac_cv_search_ZDICT_finalizeDictionary+y} +then : + +else case e in #( + e) ac_cv_search_ZDICT_finalizeDictionary=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_ZDICT_finalizeDictionary" >&5 +printf "%s\n" "$ac_cv_search_ZDICT_finalizeDictionary" >&6; } +ac_res=$ac_cv_search_ZDICT_finalizeDictionary +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking ZSTD_VERSION_NUMBER >= 1.4.5" >&5 +printf %s "checking ZSTD_VERSION_NUMBER >= 1.4.5... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include "zstd.h" +int +main (void) +{ + + #if ZSTD_VERSION_NUMBER < 10405 + # error "zstd version is too old" + #endif + + ; + return 0; +} + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + for ac_header in zstd.h zdict.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1 +_ACEOF + have_libzstd=yes +else case e in #( + e) have_libzstd=no ;; +esac +fi + +done + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } have_libzstd=no + ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +else case e in #( + e) have_libzstd=no ;; +esac +fi + + if test "x$have_libzstd" = xyes +then : + + LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""} + LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"} + +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + + else LIBZSTD_CFLAGS=$pkg_cv_LIBZSTD_CFLAGS LIBZSTD_LIBS=$pkg_cv_LIBZSTD_LIBS @@ -23362,6 +23876,33 @@ fi +ac_fn_check_decl "$LINENO" "MAXLOGNAME" "ac_cv_have_decl_MAXLOGNAME" "#include <sys/param.h> +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_MAXLOGNAME" = xyes +then : + +printf "%s\n" "#define HAVE_MAXLOGNAME 1" >>confdefs.h + +fi + +ac_fn_check_decl "$LINENO" "UT_NAMESIZE" "ac_cv_have_decl_UT_NAMESIZE" "#include <utmp.h> +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_UT_NAMESIZE" = xyes +then : + ac_have_decl=1 +else case e in #( + e) ac_have_decl=0 ;; +esac +fi +printf "%s\n" "#define HAVE_DECL_UT_NAMESIZE $ac_have_decl" >>confdefs.h +if test $ac_have_decl = 1 +then : + +printf "%s\n" "#define HAVE_UT_NAMESIZE 1" >>confdefs.h + +fi + + # check for openpty, login_tty, and forkpty @@ -25469,8 +26010,8 @@ main (void) { unsigned int fpcr; - __asm__ __volatile__ ("fmove.l %%fpcr,%0" : "=g" (fpcr)); - __asm__ __volatile__ ("fmove.l %0,%%fpcr" : : "g" (fpcr)); + __asm__ __volatile__ ("fmove.l %%fpcr,%0" : "=dm" (fpcr)); + __asm__ __volatile__ ("fmove.l %0,%%fpcr" : : "dm" (fpcr)); ; return 0; @@ -27676,110 +28217,6 @@ printf "%s\n" "#define HAVE_STAT_TV_NSEC2 1" >>confdefs.h fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether year with century should be normalized for strftime" >&5 -printf %s "checking whether year with century should be normalized for strftime... " >&6; } -if test ${ac_cv_normalize_century+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) -if test "$cross_compiling" = yes -then : - ac_cv_normalize_century=yes -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include <time.h> -#include <string.h> - -int main(void) -{ - char year[5]; - struct tm date = { - .tm_year = -1801, - .tm_mon = 0, - .tm_mday = 1 - }; - if (strftime(year, sizeof(year), "%Y", &date) && !strcmp(year, "0099")) { - return 1; - } - return 0; -} - -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - ac_cv_normalize_century=yes -else case e in #( - e) ac_cv_normalize_century=no ;; -esac -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_normalize_century" >&5 -printf "%s\n" "$ac_cv_normalize_century" >&6; } -if test "$ac_cv_normalize_century" = yes -then - -printf "%s\n" "#define Py_NORMALIZE_CENTURY 1" >>confdefs.h - -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C99-compatible strftime specifiers are supported" >&5 -printf %s "checking whether C99-compatible strftime specifiers are supported... " >&6; } -if test ${ac_cv_strftime_c99_support+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) -if test "$cross_compiling" = yes -then : - ac_cv_strftime_c99_support= -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#include <time.h> -#include <string.h> - -int main(void) -{ - char full_date[11]; - struct tm date = { - .tm_year = 0, - .tm_mon = 0, - .tm_mday = 1 - }; - if (strftime(full_date, sizeof(full_date), "%F", &date) && !strcmp(full_date, "1900-01-01")) { - return 0; - } - return 1; -} - -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - ac_cv_strftime_c99_support=yes -else case e in #( - e) as_fn_error $? "Python requires C99-compatible strftime specifiers" "$LINENO" 5 ;; -esac -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_strftime_c99_support" >&5 -printf "%s\n" "$ac_cv_strftime_c99_support" >&6; } - have_curses=no have_panel=no @@ -29426,9 +29863,6 @@ printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else - -printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi @@ -30689,9 +31123,7 @@ case $ac_sys_system in #( - py_cv_module_fcntl=n/a py_cv_module_readline=n/a - py_cv_module_termios=n/a py_cv_module_=n/a ;; #( @@ -32076,6 +32508,14 @@ LIBHACL_CFLAGS="${LIBHACL_FLAG_I} ${LIBHACL_FLAG_D} \$(PY_STDMODULE_CFLAGS) \$(C LIBHACL_LDFLAGS= # for now, no specific linker flags are needed +if test "$UNIVERSAL_ARCHS" = "universal2" -o \ + \( "$build_cpu" = "aarch64" -a "$build_vendor" = "apple" \) +then + use_hacl_universal2_impl=yes +else + use_hacl_universal2_impl=no +fi + # The SIMD files use aligned_alloc, which is not available on older versions of # Android. # The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. @@ -32121,7 +32561,7 @@ then : LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h # macOS universal2 builds *support* the -msse etc flags because they're @@ -32129,7 +32569,7 @@ printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h # isn't great, so it's disabled on ARM64. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HACL* SIMD128 implementation" >&5 printf %s "checking for HACL* SIMD128 implementation... " >&6; } - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128_universal2.o" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: universal2" >&5 printf "%s\n" "universal2" >&6; } @@ -32197,7 +32637,7 @@ then : LIBHACL_SIMD256_FLAGS="-mavx2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h # macOS universal2 builds *support* the -mavx2 compiler flag because it's @@ -32206,7 +32646,7 @@ printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h # wrapped implementation if we're building for universal2. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for HACL* SIMD256 implementation" >&5 printf %s "checking for HACL* SIMD256 implementation... " >&6; } - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256_universal2.o" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: universal2" >&5 printf "%s\n" "universal2" >&6; } @@ -34790,7 +35230,7 @@ do "Mac/PythonLauncher/Makefile") CONFIG_FILES="$CONFIG_FILES Mac/PythonLauncher/Makefile" ;; "Mac/Resources/framework/Info.plist") CONFIG_FILES="$CONFIG_FILES Mac/Resources/framework/Info.plist" ;; "Mac/Resources/app/Info.plist") CONFIG_FILES="$CONFIG_FILES Mac/Resources/app/Info.plist" ;; - "iOS/Resources/Info.plist") CONFIG_FILES="$CONFIG_FILES iOS/Resources/Info.plist" ;; + "Apple/iOS/Resources/Info.plist") CONFIG_FILES="$CONFIG_FILES Apple/iOS/Resources/Info.plist" ;; "Makefile.pre") CONFIG_FILES="$CONFIG_FILES Makefile.pre" ;; "Misc/python.pc") CONFIG_FILES="$CONFIG_FILES Misc/python.pc" ;; "Misc/python-embed.pc") CONFIG_FILES="$CONFIG_FILES Misc/python-embed.pc" ;; diff --git a/configure.ac b/configure.ac index a7b2f62579b0e9..d81c76fc12230f 100644 --- a/configure.ac +++ b/configure.ac @@ -205,7 +205,7 @@ AC_SUBST([FREEZE_MODULE_DEPS]) AC_SUBST([PYTHON_FOR_BUILD_DEPS]) AC_CHECK_PROGS([PYTHON_FOR_REGEN], - [python$PACKAGE_VERSION python3.13 python3.12 python3.11 python3.10 python3 python], + [python$PACKAGE_VERSION python3.15 python3.14 python3.13 python3.12 python3.11 python3.10 python3 python], [python3]) AC_SUBST([PYTHON_FOR_REGEN]) @@ -559,7 +559,7 @@ AC_ARG_ENABLE([framework], yes) case $ac_sys_system in Darwin) enableval=/Library/Frameworks ;; - iOS) enableval=iOS/Frameworks/\$\(MULTIARCH\) ;; + iOS) enableval=Apple/iOS/Frameworks/\$\(MULTIARCH\) ;; *) AC_MSG_ERROR([Unknown platform for framework build]) esac esac @@ -666,9 +666,9 @@ AC_ARG_ENABLE([framework], prefix=$PYTHONFRAMEWORKPREFIX PYTHONFRAMEWORKINSTALLNAMEPREFIX="@rpath/$PYTHONFRAMEWORKDIR" - RESSRCDIR=iOS/Resources + RESSRCDIR=Apple/iOS/Resources - AC_CONFIG_FILES([iOS/Resources/Info.plist]) + AC_CONFIG_FILES([Apple/iOS/Resources/Info.plist]) ;; *) AC_MSG_ERROR([Unknown platform for framework build]) @@ -1202,33 +1202,42 @@ if test x$MULTIARCH != x; then fi AC_SUBST([MULTIARCH_CPPFLAGS]) +# Guess C stack direction +AS_CASE([$host], + [hppa*], [_Py_STACK_GROWS_DOWN=0], + [_Py_STACK_GROWS_DOWN=1]) +AC_DEFINE_UNQUOTED([_Py_STACK_GROWS_DOWN], [$_Py_STACK_GROWS_DOWN], + [Define to 1 if the machine stack grows down (default); 0 if it grows up.]) +AC_SUBST([_Py_STACK_GROWS_DOWN]) + dnl Support tiers according to https://peps.python.org/pep-0011/ dnl dnl NOTE: Windows support tiers are defined in PC/pyconfig.h. dnl AC_MSG_CHECKING([for PEP 11 support tier]) AS_CASE([$host/$ac_cv_cc_name], - [x86_64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=1], dnl Linux on AMD64, any vendor, glibc, gcc - [x86_64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on Intel, any version - [aarch64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on M1, any version - [i686-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 32bit Windows on Intel, MSVC - [x86_64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 64bit Windows on AMD64, MSVC - - [aarch64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux ARM64, glibc, gcc+clang - [aarch64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], - [powerpc64le-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux on PPC64 little endian, glibc, gcc - [wasm32-unknown-wasip1/clang], [PY_SUPPORT_TIER=2], dnl WebAssembly System Interface preview1, clang - [x86_64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], dnl Linux on AMD64, any vendor, glibc, clang - - [aarch64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=3], dnl Windows ARM64, MSVC - [armv7l-*-linux-gnueabihf/gcc], [PY_SUPPORT_TIER=3], dnl ARMv7 LE with hardware floats, any vendor, glibc, gcc - [powerpc64le-*-linux-gnu/clang], [PY_SUPPORT_TIER=3], dnl Linux on PPC64 little endian, glibc, clang - [s390x-*-linux-gnu/gcc], [PY_SUPPORT_TIER=3], dnl Linux on 64bit s390x (big endian), glibc, gcc - [x86_64-*-freebsd*/clang], [PY_SUPPORT_TIER=3], dnl FreeBSD on AMD64 - [aarch64-apple-ios*-simulator/clang], [PY_SUPPORT_TIER=3], dnl iOS Simulator on arm64 - [aarch64-apple-ios*/clang], [PY_SUPPORT_TIER=3], dnl iOS on ARM64 - [aarch64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on ARM64 - [x86_64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on AMD64 + [x86_64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=1], dnl Linux on AMD64, any vendor, glibc, gcc + [x86_64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on Intel, any version + [aarch64-apple-darwin*/clang], [PY_SUPPORT_TIER=1], dnl macOS on M1, any version + [i686-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 32bit Windows on Intel, MSVC + [x86_64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=1], dnl 64bit Windows on AMD64, MSVC + + [aarch64-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux ARM64, glibc, gcc+clang + [aarch64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], + [powerpc64le-*-linux-gnu/gcc], [PY_SUPPORT_TIER=2], dnl Linux on PPC64 little endian, glibc, gcc + [wasm32-unknown-wasip1/clang], [PY_SUPPORT_TIER=2], dnl WebAssembly System Interface preview1, clang + [x86_64-*-linux-gnu/clang], [PY_SUPPORT_TIER=2], dnl Linux on AMD64, any vendor, glibc, clang + + [aarch64-pc-windows-msvc/msvc], [PY_SUPPORT_TIER=3], dnl Windows ARM64, MSVC + [armv7l-*-linux-gnueabihf/gcc], [PY_SUPPORT_TIER=3], dnl ARMv7 LE with hardware floats, any vendor, glibc, gcc + [powerpc64le-*-linux-gnu/clang], [PY_SUPPORT_TIER=3], dnl Linux on PPC64 little endian, glibc, clang + [s390x-*-linux-gnu/gcc], [PY_SUPPORT_TIER=3], dnl Linux on 64bit s390x (big endian), glibc, gcc + [x86_64-*-freebsd*/clang], [PY_SUPPORT_TIER=3], dnl FreeBSD on AMD64 + [aarch64-apple-ios*-simulator/clang], [PY_SUPPORT_TIER=3], dnl iOS Simulator on arm64 + [aarch64-apple-ios*/clang], [PY_SUPPORT_TIER=3], dnl iOS on ARM64 + [aarch64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on ARM64 + [x86_64-*-linux-android/clang], [PY_SUPPORT_TIER=3], dnl Android on AMD64 + [wasm32-*-emscripten/emcc], [PY_SUPPORT_TIER=3], dnl Emscripten [PY_SUPPORT_TIER=0] ) @@ -1716,7 +1725,7 @@ ABI_THREAD="" # --disable-gil AC_MSG_CHECKING([for --disable-gil]) AC_ARG_ENABLE([gil], - [AS_HELP_STRING([--disable-gil], [enable experimental support for running without the GIL (default is no)])], + [AS_HELP_STRING([--disable-gil], [enable support for running without the GIL (default is no)])], [AS_VAR_IF([enable_gil], [yes], [disable_gil=no], [disable_gil=yes])], [disable_gil=no] ) AC_MSG_RESULT([$disable_gil]) @@ -2334,9 +2343,11 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) - AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) + dnl Avoid bugs in JS fallback string decoding path + AS_VAR_APPEND([LINKFORSHARED], [" -sTEXTDECODER=2"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) @@ -2776,7 +2787,7 @@ AS_VAR_IF([jit_flags], [], [AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"]) AS_VAR_SET([REGEN_JIT_COMMAND], - ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host}"]) + ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\""]) AS_VAR_SET([JIT_STENCILS_H], ["jit_stencils.h"]) AS_VAR_IF([Py_DEBUG], [true], @@ -3172,10 +3183,6 @@ if test "$ac_cv_sizeof_off_t" -gt "$ac_cv_sizeof_long" -a \ else have_largefile_support="no" fi -dnl LFS does not work with Emscripten 3.1 -AS_CASE([$ac_sys_system], - [Emscripten], [have_largefile_support="no"] -) AS_VAR_IF([have_largefile_support], [yes], [ AC_DEFINE([HAVE_LARGEFILE_SUPPORT], [1], [Defined to enable large file support when an off_t is bigger than a long @@ -3740,15 +3747,17 @@ dnl check for uuid dependencies AH_TEMPLATE([HAVE_UUID_H], [Define to 1 if you have the <uuid.h> header file.]) AH_TEMPLATE([HAVE_UUID_UUID_H], [Define to 1 if you have the <uuid/uuid.h> header file.]) AH_TEMPLATE([HAVE_UUID_GENERATE_TIME_SAFE], [Define if uuid_generate_time_safe() exists.]) +AH_TEMPLATE([HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC], [Define if uuid_generate_time_safe() is able to deduce a MAC address.]) have_uuid=missing dnl AIX provides support for RFC4122 (uuid) in libc.a starting with AIX 6.1 dnl (anno 2007). FreeBSD and OpenBSD provides support in libc as well. dnl Little-endian FreeBSD, OpenBSD and NetBSD needs encoding into an octet dnl stream in big-endian byte-order -AC_CHECK_HEADERS([uuid.h], - [AC_CHECK_FUNCS([uuid_create uuid_enc_be], - [have_uuid=yes +AC_CHECK_HEADERS([uuid.h], [ + AC_CHECK_FUNCS([uuid_create uuid_enc_be], [ + have_uuid=yes + ac_cv_have_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} ]) @@ -3758,19 +3767,29 @@ AS_VAR_IF([have_uuid], [missing], [ PKG_CHECK_MODULES( [LIBUUID], [uuid >= 2.20], [dnl linux-util's libuuid has uuid_generate_time_safe() since v2.20 (2011) - dnl and provides <uuid.h>. + dnl and provides <uuid.h> assuming specific include paths are given have_uuid=yes - AC_DEFINE([HAVE_UUID_H], [1]) - AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) + ac_cv_have_uuid_generate_time_safe=yes + # The uuid.h file to include may be <uuid.h> *or* <uuid/uuid.h>. + # Since pkg-config --cflags uuid may return -I/usr/include/uuid, + # it's possible to write '#include <uuid.h>' in _uuidmodule.c, + # assuming that the compiler flags are properly updated. + # + # Ideally, we should have defined HAVE_UUID_H if and only if + # #include <uuid.h> can be written, *without* assuming extra + # include path. + ac_cv_have_uuid_h=yes ], [ WITH_SAVE_ENV([ CPPFLAGS="$CPPFLAGS $LIBUUID_CFLAGS" LIBS="$LIBS $LIBUUID_LIBS" AC_CHECK_HEADERS([uuid/uuid.h], [ + ac_cv_have_uuid_uuid_h=yes PY_CHECK_LIB([uuid], [uuid_generate_time], [have_uuid=yes]) - PY_CHECK_LIB([uuid], [uuid_generate_time_safe], - [have_uuid=yes - AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) ]) ]) + PY_CHECK_LIB([uuid], [uuid_generate_time_safe], [ + have_uuid=yes + ac_cv_have_uuid_generate_time_safe=yes + ])]) AS_VAR_IF([have_uuid], [yes], [ LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-"-luuid"} @@ -3785,12 +3804,19 @@ AS_VAR_IF([have_uuid], [missing], [ AC_CHECK_HEADERS([uuid/uuid.h], [ AC_CHECK_FUNC([uuid_generate_time], [ have_uuid=yes + ac_cv_have_uuid_uuid_h=yes LIBUUID_CFLAGS=${LIBUUID_CFLAGS-""} LIBUUID_LIBS=${LIBUUID_LIBS-""} ]) ]) ]) +AS_VAR_IF([ac_cv_have_uuid_h], [yes], [AC_DEFINE([HAVE_UUID_H], [1])]) +AS_VAR_IF([ac_cv_have_uuid_uuid_h], [yes], [AC_DEFINE([HAVE_UUID_UUID_H], [1])]) +AS_VAR_IF([ac_cv_have_uuid_generate_time_safe], [yes], [ + AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE], [1]) +]) + # gh-124228: While the libuuid library is available on NetBSD, it supports only UUID version 4. # This restriction inhibits the proper generation of time-based UUIDs. if test "$ac_sys_system" = "NetBSD"; then @@ -3800,6 +3826,68 @@ fi AS_VAR_IF([have_uuid], [missing], [have_uuid=no]) +# gh-132710: The UUID node is fetched by using libuuid when possible +# and cached. While the node is constant within the same process, +# different interpreters may have different values as libuuid may +# randomize the node value if the latter cannot be deduced. +# +# Consumers may define HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC +# to indicate that libuuid is unstable and should not be relied +# upon to deduce the MAC address. +AC_DEFUN([PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC], [WITH_SAVE_ENV([ + # Be sure to add the extra include path if we used pkg-config + # as HAVE_UUID_H may be set even though <uuid.h> is only reachable + # by adding extra -I flags. + # + # If the following script does not compile, we simply assume that + # libuuid is missing. + CFLAGS="$CFLAGS $LIBUUID_CFLAGS" + LIBS="$LIBS $LIBUUID_LIBS" + AC_RUN_IFELSE([AC_LANG_SOURCE([[ + #include <inttypes.h> // PRIu64 + #include <stdint.h> // uint64_t + #include <stdio.h> // fopen(), fclose() + + #ifdef HAVE_UUID_H + #include <uuid.h> + #else + #include <uuid/uuid.h> + #endif + + #define ERR 1 + int main(void) { + uuid_t uuid; // unsigned char[16] + (void)uuid_generate_time_safe(uuid); + uint64_t node = 0; + for (size_t i = 0; i < 6; i++) { + node |= (uint64_t)uuid[15 - i] << (8 * i); + } + FILE *fp = fopen("conftest.out", "w"); + if (fp == NULL) { + return ERR; + } + int rc = fprintf(fp, "%" PRIu64 "\n", node) >= 0; + rc |= fclose(fp); + return rc == 0 ? 0 : ERR; + }]])], [ + AS_VAR_SET([$1], [`cat conftest.out`]) + ], [], [] + )])]) + +if test "$have_uuid" = "yes" -a "$HAVE_UUID_GENERATE_TIME_SAFE" = "1" +then + AC_MSG_CHECKING([if uuid_generate_time_safe() node value is stable]) + PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC([py_cv_uuid_node1]) + PY_EXTRACT_UUID_GENERATE_TIME_SAFE_MAC([py_cv_uuid_node2]) + if test -n "$py_cv_uuid_node1" -a "$py_cv_uuid_node1" = "$py_cv_uuid_node2" + then + AC_DEFINE([HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC], [1]) + AC_MSG_RESULT([stable]) + else + AC_MSG_RESULT([unstable]) + fi +fi + # 'Real Time' functions on Solaris # posix4 on Solaris 2.6 # pthread (first!) on Linux @@ -4086,7 +4174,7 @@ int main(void) [ac_cv_ffi_complex_double_supported=no]) ])]) if test "$ac_cv_ffi_complex_double_supported" = "yes"; then - AC_DEFINE([Py_FFI_SUPPORT_C_COMPLEX], [1], + AC_DEFINE([_Py_FFI_SUPPORT_C_COMPLEX], [1], [Defined if _Complex C type can be used with libffi.]) fi @@ -4134,13 +4222,13 @@ AS_VAR_IF([with_system_libmpdec], [yes], [have_mpdec=no]) ])], [AC_MSG_WARN([m4_normalize([ - the bundled copy of libmpdecimal is scheduled for removal in Python 3.15; + the bundled copy of libmpdec is scheduled for removal in Python 3.16; consider using a system installed mpdecimal library.])])]) AS_IF([test "$with_system_libmpdec" = "yes" && test "$have_mpdec" = "no"], [AC_MSG_WARN([m4_normalize([ - no system libmpdecimal found; falling back to bundled libmpdecimal - (deprecated and scheduled for removal in Python 3.15)])]) + no system libmpdec found; falling back to bundled libmpdec + (deprecated and scheduled for removal in Python 3.16)])]) USE_BUNDLED_LIBMPDEC()]) # Disable forced inlining in debug builds, see GH-94847 @@ -5033,16 +5121,33 @@ then # linked into the binary. Correspondingly, dtrace(1) is missing the ELF # generation flag '-G'. We check for presence of this flag, rather than # hardcoding support by OS, in the interest of robustness. + # + # NetBSD DTrace requires the -x nolibs flag to avoid system library conflicts + # and uses header generation for testing instead of object generation. AC_CACHE_CHECK([whether DTrace probes require linking], - [ac_cv_dtrace_link], [dnl + [ac_cv_dtrace_link], [ ac_cv_dtrace_link=no echo 'BEGIN{}' > conftest.d - "$DTRACE" $DFLAGS -G -s conftest.d -o conftest.o > /dev/null 2>&1 && \ + case $host in + *netbsd*) + DTRACE_TEST_FLAGS="-x nolibs -h" + ;; + *) + DTRACE_TEST_FLAGS="-G" + ;; + esac + "$DTRACE" $DFLAGS $DTRACE_TEST_FLAGS -s conftest.d -o conftest.o > /dev/null 2>&1 && \ ac_cv_dtrace_link=yes ]) if test "$ac_cv_dtrace_link" = "yes"; then DTRACE_OBJS="Python/pydtrace.o" fi + # Set NetBSD-specific DTrace flags in DFLAGS + case $host in + *netbsd*) + DFLAGS="$DFLAGS -x nolibs" + ;; + esac fi dnl Platform-specific C and header files. @@ -5051,7 +5156,7 @@ PLATFORM_OBJS= AS_CASE([$ac_sys_system], [Emscripten], [ - AS_VAR_APPEND([PLATFORM_OBJS], [' Python/emscripten_signal.o Python/emscripten_trampoline.o']) + AS_VAR_APPEND([PLATFORM_OBJS], [' Python/emscripten_signal.o Python/emscripten_trampoline.o Python/emscripten_trampoline_wasm.o Python/emscripten_syscalls.o']) AS_VAR_APPEND([PLATFORM_HEADERS], [' $(srcdir)/Include/internal/pycore_emscripten_signal.h $(srcdir)/Include/internal/pycore_emscripten_trampoline.h']) ], ) @@ -5138,7 +5243,7 @@ AC_CHECK_FUNCS([ \ faccessat fchmod fchmodat fchown fchownat fdopendir fdwalk fexecve \ fork fork1 fpathconf fstatat ftime ftruncate futimens futimes futimesat \ gai_strerror getegid geteuid getgid getgrent getgrgid getgrgid_r \ - getgrnam_r getgrouplist gethostname getitimer getloadavg getlogin \ + getgrnam_r getgrouplist gethostname getitimer getloadavg getlogin getlogin_r \ getpeername getpgid getpid getppid getpriority _getpty \ getpwent getpwnam_r getpwuid getpwuid_r getresgid getresuid getrusage getsid getspent \ getspnam getuid getwd grantpt if_nameindex initgroups kill killpg lchown linkat \ @@ -5148,7 +5253,7 @@ AC_CHECK_FUNCS([ \ posix_spawn_file_actions_addclosefrom_np \ pread preadv preadv2 process_vm_readv \ pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ - pthread_kill pthread_get_name_np pthread_getname_np pthread_set_name_np + pthread_kill pthread_get_name_np pthread_getname_np pthread_set_name_np \ pthread_setname_np pthread_getattr_np \ ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ @@ -5387,7 +5492,33 @@ PKG_CHECK_MODULES([LIBLZMA], [liblzma], [have_liblzma=yes], [ ]) dnl zstd 1.4.5 stabilised ZDICT_finalizeDictionary -PKG_CHECK_MODULES([LIBZSTD], [libzstd >= 1.4.5], [have_libzstd=yes], [have_libzstd=no]) +PKG_CHECK_MODULES([LIBZSTD], [libzstd >= 1.4.5], [have_libzstd=yes], [ + WITH_SAVE_ENV([ + CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS" + CFLAGS="$CFLAGS $LIBZSTD_CFLAGS" + LIBS="$LIBS $LIBZSTD_LIBS" + AC_SEARCH_LIBS([ZDICT_finalizeDictionary], [zstd], [ + AC_MSG_CHECKING([ZSTD_VERSION_NUMBER >= 1.4.5]) + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([@%:@include "zstd.h"], [ + #if ZSTD_VERSION_NUMBER < 10405 + # error "zstd version is too old" + #endif + ]) + ], [ + AC_MSG_RESULT([yes]) + AC_CHECK_HEADERS([zstd.h zdict.h], [have_libzstd=yes], [have_libzstd=no]) + ], [ + AC_MSG_RESULT([no]) + have_libzstd=no + ]) + ], [have_libzstd=no]) + AS_VAR_IF([have_libzstd], [yes], [ + LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""} + LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"} + ]) + ]) +]) dnl PY_CHECK_NETDB_FUNC(FUNCTION) AC_DEFUN([PY_CHECK_NETDB_FUNC], [PY_CHECK_FUNC([$1], [@%:@include <netdb.h>])]) @@ -5431,6 +5562,18 @@ PY_CHECK_FUNC([setgroups], [ #endif ]) +AC_CHECK_DECL([MAXLOGNAME], + [AC_DEFINE([HAVE_MAXLOGNAME], [1], + [Define if you have the 'MAXLOGNAME' constant.])], + [], + [@%:@include <sys/param.h>]) + +AC_CHECK_DECLS([UT_NAMESIZE], + [AC_DEFINE([HAVE_UT_NAMESIZE], [1], + [Define if you have the 'HAVE_UT_NAMESIZE' constant.])], + [], + [@%:@include <utmp.h>]) + # check for openpty, login_tty, and forkpty AC_CHECK_FUNCS([openpty], [], @@ -5956,8 +6099,8 @@ AS_VAR_IF([ac_cv_gcc_asm_for_x87], [yes], [ AC_CACHE_CHECK([whether we can use gcc inline assembler to get and set mc68881 fpcr], [ac_cv_gcc_asm_for_mc68881], [ AC_LINK_IFELSE( [AC_LANG_PROGRAM([[]], [[ unsigned int fpcr; - __asm__ __volatile__ ("fmove.l %%fpcr,%0" : "=g" (fpcr)); - __asm__ __volatile__ ("fmove.l %0,%%fpcr" : : "g" (fpcr)); + __asm__ __volatile__ ("fmove.l %%fpcr,%0" : "=dm" (fpcr)); + __asm__ __volatile__ ("fmove.l %0,%%fpcr" : : "dm" (fpcr)); ]])],[ac_cv_gcc_asm_for_mc68881=yes],[ac_cv_gcc_asm_for_mc68881=no]) ]) AS_VAR_IF([ac_cv_gcc_asm_for_mc68881], [yes], [ @@ -6674,57 +6817,6 @@ then [Define if you have struct stat.st_mtimensec]) fi -AC_CACHE_CHECK([whether year with century should be normalized for strftime], [ac_cv_normalize_century], [ -AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include <time.h> -#include <string.h> - -int main(void) -{ - char year[5]; - struct tm date = { - .tm_year = -1801, - .tm_mon = 0, - .tm_mday = 1 - }; - if (strftime(year, sizeof(year), "%Y", &date) && !strcmp(year, "0099")) { - return 1; - } - return 0; -} -]])], -[ac_cv_normalize_century=yes], -[ac_cv_normalize_century=no], -[ac_cv_normalize_century=yes])]) -if test "$ac_cv_normalize_century" = yes -then - AC_DEFINE([Py_NORMALIZE_CENTURY], [1], - [Define if year with century should be normalized for strftime.]) -fi - -AC_CACHE_CHECK([whether C99-compatible strftime specifiers are supported], [ac_cv_strftime_c99_support], [ -AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include <time.h> -#include <string.h> - -int main(void) -{ - char full_date[11]; - struct tm date = { - .tm_year = 0, - .tm_mon = 0, - .tm_mday = 1 - }; - if (strftime(full_date, sizeof(full_date), "%F", &date) && !strcmp(full_date, "1900-01-01")) { - return 0; - } - return 1; -} -]])], -[ac_cv_strftime_c99_support=yes], -[AC_MSG_ERROR([Python requires C99-compatible strftime specifiers])], -[ac_cv_strftime_c99_support=])]) - dnl check for ncursesw/ncurses and panelw/panel dnl NOTE: old curses is not detected. dnl have_curses=[no, yes] @@ -7053,8 +7145,6 @@ if test "$with_remote_debug" = yes; then [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([yes]) else - AC_DEFINE([Py_REMOTE_DEBUG], [0], - [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([no]) fi @@ -7662,9 +7752,7 @@ AS_CASE([$ac_sys_system], ) dnl fcntl, readline, and termios are not particularly useful in browsers. PY_STDLIB_MOD_SET_NA( - [fcntl], [readline], - [termios], ) ], [WASI], [ @@ -7894,6 +7982,15 @@ AC_SUBST([LIBHACL_CFLAGS]) LIBHACL_LDFLAGS= # for now, no specific linker flags are needed AC_SUBST([LIBHACL_LDFLAGS]) +dnl Check if universal2 HACL* implementation should be used. +if test "$UNIVERSAL_ARCHS" = "universal2" -o \ + \( "$build_cpu" = "aarch64" -a "$build_vendor" = "apple" \) +then + use_hacl_universal2_impl=yes +else + use_hacl_universal2_impl=no +fi + # The SIMD files use aligned_alloc, which is not available on older versions of # Android. # The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. @@ -7904,13 +8001,14 @@ then AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ + HACL* library can compile SIMD128 implementations]) # macOS universal2 builds *support* the -msse etc flags because they're # available on x86_64. However, performance of the HACL SIMD128 implementation # isn't great, so it's disabled on ARM64. AC_MSG_CHECKING([for HACL* SIMD128 implementation]) - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then [LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128_universal2.o"] AC_MSG_RESULT([universal2]) else @@ -7935,14 +8033,15 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ + HACL* library can compile SIMD256 implementations]) # macOS universal2 builds *support* the -mavx2 compiler flag because it's # available on x86_64; but the HACL SIMD256 build then fails because the # implementation requires symbols that aren't available on ARM64. Use a # wrapped implementation if we're building for universal2. AC_MSG_CHECKING([for HACL* SIMD256 implementation]) - if test "$UNIVERSAL_ARCHS" == "universal2"; then + if test "$use_hacl_universal2_impl" = "yes"; then [LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256_universal2.o"] AC_MSG_RESULT([universal2]) else diff --git a/iOS/README.rst b/iOS/README.rst deleted file mode 100644 index 13b885144932e4..00000000000000 --- a/iOS/README.rst +++ /dev/null @@ -1,375 +0,0 @@ -==================== -Python on iOS README -==================== - -:Authors: - Russell Keith-Magee (2023-11) - -This document provides a quick overview of some iOS specific features in the -Python distribution. - -These instructions are only needed if you're planning to compile Python for iOS -yourself. Most users should *not* need to do this. If you're looking to -experiment with writing an iOS app in Python, tools such as `BeeWare's Briefcase -<https://briefcase.readthedocs.io>`__ and `Kivy's Buildozer -<https://buildozer.readthedocs.io>`__ will provide a much more approachable -user experience. - -Compilers for building on iOS -============================= - -Building for iOS requires the use of Apple's Xcode tooling. It is strongly -recommended that you use the most recent stable release of Xcode. This will -require the use of the most (or second-most) recently released macOS version, -as Apple does not maintain Xcode for older macOS versions. The Xcode Command -Line Tools are not sufficient for iOS development; you need a *full* Xcode -install. - -If you want to run your code on the iOS simulator, you'll also need to install -an iOS Simulator Platform. You should be prompted to select an iOS Simulator -Platform when you first run Xcode. Alternatively, you can add an iOS Simulator -Platform by selecting an open the Platforms tab of the Xcode Settings panel. - -iOS specific arguments to configure -=================================== - -* ``--enable-framework[=DIR]`` - - This argument specifies the location where the Python.framework will be - installed. If ``DIR`` is not specified, the framework will be installed into - a subdirectory of the ``iOS/Frameworks`` folder. - - This argument *must* be provided when configuring iOS builds. iOS does not - support non-framework builds. - -* ``--with-framework-name=NAME`` - - Specify the name for the Python framework; defaults to ``Python``. - - .. admonition:: Use this option with care! - - Unless you know what you're doing, changing the name of the Python - framework on iOS is not advised. If you use this option, you won't be able - to run the ``make testios`` target without making significant manual - alterations, and you won't be able to use any binary packages unless you - compile them yourself using your own framework name. - -Building Python on iOS -====================== - -ABIs and Architectures ----------------------- - -iOS apps can be deployed on physical devices, and on the iOS simulator. Although -the API used on these devices is identical, the ABI is different - you need to -link against different libraries for an iOS device build (``iphoneos``) or an -iOS simulator build (``iphonesimulator``). - -Apple uses the ``XCframework`` format to allow specifying a single dependency -that supports multiple ABIs. An ``XCframework`` is a wrapper around multiple -ABI-specific frameworks that share a common API. - -iOS can also support different CPU architectures within each ABI. At present, -there is only a single supported architecture on physical devices - ARM64. -However, the *simulator* supports 2 architectures - ARM64 (for running on Apple -Silicon machines), and x86_64 (for running on older Intel-based machines). - -To support multiple CPU architectures on a single platform, Apple uses a "fat -binary" format - a single physical file that contains support for multiple -architectures. It is possible to compile and use a "thin" single architecture -version of a binary for testing purposes; however, the "thin" binary will not be -portable to machines using other architectures. - -Building a single-architecture framework ----------------------------------------- - -The Python build system will create a ``Python.framework`` that supports a -*single* ABI with a *single* architecture. Unlike macOS, iOS does not allow a -framework to contain non-library content, so the iOS build will produce a -``bin`` and ``lib`` folder in the same output folder as ``Python.framework``. -The ``lib`` folder will be needed at runtime to support the Python library. - -If you want to use Python in a real iOS project, you need to produce multiple -``Python.framework`` builds, one for each ABI and architecture. iOS builds of -Python *must* be constructed as framework builds. To support this, you must -provide the ``--enable-framework`` flag when configuring the build. The build -also requires the use of cross-compilation. The minimal commands for building -Python for the ARM64 iOS simulator will look something like:: - - $ export PATH="$(pwd)/iOS/Resources/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/Apple/usr/bin" - $ ./configure \ - --enable-framework \ - --host=arm64-apple-ios-simulator \ - --build=arm64-apple-darwin \ - --with-build-python=/path/to/python.exe - $ make - $ make install - -In this invocation: - -* ``iOS/Resources/bin`` has been added to the path, providing some shims for the - compilers and linkers needed by the build. Xcode requires the use of ``xcrun`` - to invoke compiler tooling. However, if ``xcrun`` is pre-evaluated and the - result passed to ``configure``, these results can embed user- and - version-specific paths into the sysconfig data, which limits the portability - of the compiled Python. Alternatively, if ``xcrun`` is used *as* the compiler, - it requires that compiler variables like ``CC`` include spaces, which can - cause significant problems with many C configuration systems which assume that - ``CC`` will be a single executable. - - To work around this problem, the ``iOS/Resources/bin`` folder contains some - wrapper scripts that present as simple compilers and linkers, but wrap - underlying calls to ``xcrun``. This allows configure to use a ``CC`` - definition without spaces, and without user- or version-specific paths, while - retaining the ability to adapt to the local Xcode install. These scripts are - included in the ``bin`` directory of an iOS install. - - These scripts will, by default, use the currently active Xcode installation. - If you want to use a different Xcode installation, you can use - ``xcode-select`` to set a new default Xcode globally, or you can use the - ``DEVELOPER_DIR`` environment variable to specify an Xcode install. The - scripts will use the default ``iphoneos``/``iphonesimulator`` SDK version for - the select Xcode install; if you want to use a different SDK, you can set the - ``IOS_SDK_VERSION`` environment variable. (e.g, setting - ``IOS_SDK_VERSION=17.1`` would cause the scripts to use the ``iphoneos17.1`` - and ``iphonesimulator17.1`` SDKs, regardless of the Xcode default.) - - The path has also been cleared of any user customizations. A common source of - bugs is for tools like Homebrew to accidentally leak macOS binaries into an iOS - build. Resetting the path to a known "bare bones" value is the easiest way to - avoid these problems. - -* ``--host`` is the architecture and ABI that you want to build, in GNU compiler - triple format. This will be one of: - - - ``arm64-apple-ios`` for ARM64 iOS devices. - - ``arm64-apple-ios-simulator`` for the iOS simulator running on Apple - Silicon devices. - - ``x86_64-apple-ios-simulator`` for the iOS simulator running on Intel - devices. - -* ``--build`` is the GNU compiler triple for the machine that will be running - the compiler. This is one of: - - - ``arm64-apple-darwin`` for Apple Silicon devices. - - ``x86_64-apple-darwin`` for Intel devices. - -* ``/path/to/python.exe`` is the path to a Python binary on the machine that - will be running the compiler. This is needed because the Python compilation - process involves running some Python code. On a normal desktop build of - Python, you can compile a python interpreter and then use that interpreter to - run Python code. However, the binaries produced for iOS won't run on macOS, so - you need to provide an external Python interpreter. This interpreter must be - the same version as the Python that is being compiled. To be completely safe, - this should be the *exact* same commit hash. However, the longer a Python - release has been stable, the more likely it is that this constraint can be - relaxed - the same micro version will often be sufficient. - -* The ``install`` target for iOS builds is slightly different to other - platforms. On most platforms, ``make install`` will install the build into - the final runtime location. This won't be the case for iOS, as the final - runtime location will be on a physical device. - - However, you still need to run the ``install`` target for iOS builds, as it - performs some final framework assembly steps. The location specified with - ``--enable-framework`` will be the location where ``make install`` will - assemble the complete iOS framework. This completed framework can then - be copied and relocated as required. - -For a full CPython build, you also need to specify the paths to iOS builds of -the binary libraries that CPython depends on (XZ, BZip2, LibFFI and OpenSSL). -This can be done by defining the ``LIBLZMA_CFLAGS``, ``LIBLZMA_LIBS``, -``BZIP2_CFLAGS``, ``BZIP2_LIBS``, ``LIBFFI_CFLAGS``, and ``LIBFFI_LIBS`` -environment variables, and the ``--with-openssl`` configure option. Versions of -these libraries pre-compiled for iOS can be found in `this repository -<https://github.com/beeware/cpython-apple-source-deps/releases>`__. LibFFI is -especially important, as many parts of the standard library (including the -``platform``, ``sysconfig`` and ``webbrowser`` modules) require the use of the -``ctypes`` module at runtime. - -By default, Python will be compiled with an iOS deployment target (i.e., the -minimum supported iOS version) of 13.0. To specify a different deployment -target, provide the version number as part of the ``--host`` argument - for -example, ``--host=arm64-apple-ios15.4-simulator`` would compile an ARM64 -simulator build with a deployment target of 15.4. - -Merge thin frameworks into fat frameworks ------------------------------------------ - -Once you've built a ``Python.framework`` for each ABI and and architecture, you -must produce a "fat" framework for each ABI that contains all the architectures -for that ABI. - -The ``iphoneos`` build only needs to support a single architecture, so it can be -used without modification. - -If you only want to support a single simulator architecture, (e.g., only support -ARM64 simulators), you can use a single architecture ``Python.framework`` build. -However, if you want to create ``Python.xcframework`` that supports *all* -architectures, you'll need to merge the ``iphonesimulator`` builds for ARM64 and -x86_64 into a single "fat" framework. - -The "fat" framework can be constructed by performing a directory merge of the -content of the two "thin" ``Python.framework`` directories, plus the ``bin`` and -``lib`` folders for each thin framework. When performing this merge: - -* The pure Python standard library content is identical for each architecture, - except for a handful of platform-specific files (such as the ``sysconfig`` - module). Ensure that the "fat" framework has the union of all standard library - files. - -* Any binary files in the standard library, plus the main - ``libPython3.X.dylib``, can be merged using the ``lipo`` tool, provide by - Xcode:: - - $ lipo -create -output module.dylib path/to/x86_64/module.dylib path/to/arm64/module.dylib - -* The header files will be identical on both architectures, except for - ``pyconfig.h``. Copy all the headers from one platform (say, arm64), rename - ``pyconfig.h`` to ``pyconfig-arm64.h``, and copy the ``pyconfig.h`` for the - other architecture into the merged header folder as ``pyconfig-x86_64.h``. - Then copy the ``iOS/Resources/pyconfig.h`` file from the CPython sources into - the merged headers folder. This will allow the two Python architectures to - share a common ``pyconfig.h`` header file. - -At this point, you should have 2 Python.framework folders - one for ``iphoneos``, -and one for ``iphonesimulator`` that is a merge of x86+64 and ARM64 content. - -Merge frameworks into an XCframework ------------------------------------- - -Now that we have 2 (potentially fat) ABI-specific frameworks, we can merge those -frameworks into a single ``XCframework``. - -The initial skeleton of an ``XCframework`` is built using:: - - xcodebuild -create-xcframework -output Python.xcframework -framework path/to/iphoneos/Python.framework -framework path/to/iphonesimulator/Python.framework - -Then, copy the ``bin`` and ``lib`` folders into the architecture-specific slices of -the XCframework:: - - cp path/to/iphoneos/bin Python.xcframework/ios-arm64 - cp path/to/iphoneos/lib Python.xcframework/ios-arm64 - - cp path/to/iphonesimulator/bin Python.xcframework/ios-arm64_x86_64-simulator - cp path/to/iphonesimulator/lib Python.xcframework/ios-arm64_x86_64-simulator - -Note that the name of the architecture-specific slice for the simulator will -depend on the CPU architecture(s) that you build. - -You now have a Python.xcframework that can be used in a project. - -Testing Python on iOS -===================== - -The ``iOS/testbed`` folder that contains an Xcode project that is able to run -the iOS test suite. This project converts the Python test suite into a single -test case in Xcode's XCTest framework. The single XCTest passes if the test -suite passes. - -To run the test suite, configure a Python build for an iOS simulator (i.e., -``--host=arm64-apple-ios-simulator`` or ``--host=x86_64-apple-ios-simulator`` -), specifying a framework build (i.e. ``--enable-framework``). Ensure that your -``PATH`` has been configured to include the ``iOS/Resources/bin`` folder and -exclude any non-iOS tools, then run:: - - $ make all - $ make install - $ make testios - -This will: - -* Build an iOS framework for your chosen architecture; -* Finalize the single-platform framework; -* Make a clean copy of the testbed project; -* Install the Python iOS framework into the copy of the testbed project; and -* Run the test suite on an "iPhone SE (3rd generation)" simulator. - -On success, the test suite will exit and report successful completion of the -test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 15 -minutes to run; a couple of extra minutes is required to compile the testbed -project, and then boot and prepare the iOS simulator. - -Debugging test failures ------------------------ - -Running ``make test`` generates a standalone version of the ``iOS/testbed`` -project, and runs the full test suite. It does this using ``iOS/testbed`` -itself - the folder is an executable module that can be used to create and run -a clone of the testbed project. - -You can generate your own standalone testbed instance by running:: - - $ python iOS/testbed clone --framework iOS/Frameworks/arm64-iphonesimulator my-testbed - -This invocation assumes that ``iOS/Frameworks/arm64-iphonesimulator`` is the -path to the iOS simulator framework for your platform (ARM64 in this case); -``my-testbed`` is the name of the folder for the new testbed clone. - -You can then use the ``my-testbed`` folder to run the Python test suite, -passing in any command line arguments you may require. For example, if you're -trying to diagnose a failure in the ``os`` module, you might run:: - - $ python my-testbed run -- test -W test_os - -This is the equivalent of running ``python -m test -W test_os`` on a desktop -Python build. Any arguments after the ``--`` will be passed to testbed as if -they were arguments to ``python -m`` on a desktop machine. - -You can also open the testbed project in Xcode by running:: - - $ open my-testbed/iOSTestbed.xcodeproj - -This will allow you to use the full Xcode suite of tools for debugging. - -Testing on an iOS device -^^^^^^^^^^^^^^^^^^^^^^^^ - -To test on an iOS device, the app needs to be signed with known developer -credentials. To obtain these credentials, you must have an iOS Developer -account, and your Xcode install will need to be logged into your account (see -the Accounts tab of the Preferences dialog). - -Once the project is open, and you're signed into your Apple Developer account, -select the root node of the project tree (labeled "iOSTestbed"), then the -"Signing & Capabilities" tab in the details page. Select a development team -(this will likely be your own name), and plug in a physical device to your -macOS machine with a USB cable. You should then be able to select your physical -device from the list of targets in the pulldown in the Xcode titlebar. - -Running specific tests -^^^^^^^^^^^^^^^^^^^^^^ - -As the test suite is being executed on an iOS simulator, it is not possible to -pass in command line arguments to configure test suite operation. To work -around this limitation, the arguments that would normally be passed as command -line arguments are configured as part of the ``iOSTestbed-Info.plist`` file -that is used to configure the iOS testbed app. In this file, the ``TestArgs`` -key is an array containing the arguments that would be passed to ``python -m`` -on the command line (including ``test`` in position 0, the name of the test -module to be executed). - -Disabling automated breakpoints -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -By default, Xcode will inserts an automatic breakpoint whenever a signal is -raised. The Python test suite raises many of these signals as part of normal -operation; unless you are trying to diagnose an issue with signals, the -automatic breakpoints can be inconvenient. However, they can be disabled by -creating a symbolic breakpoint that is triggered at the start of the test run. - -Select "Debug > Breakpoints > Create Symbolic Breakpoint" from the Xcode menu, and -populate the new brewpoint with the following details: - -* **Name**: IgnoreSignals -* **Symbol**: UIApplicationMain -* **Action**: Add debugger commands for: - - ``process handle SIGINT -n true -p true -s false`` - - ``process handle SIGUSR1 -n true -p true -s false`` - - ``process handle SIGUSR2 -n true -p true -s false`` - - ``process handle SIGXFSZ -n true -p true -s false`` -* Check the "Automatically continue after evaluating" box. - -All other details can be left blank. When the process executes the -``UIApplicationMain`` entry point, the breakpoint will trigger, run the debugger -commands to disable the automatic breakpoints, and automatically resume. diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/iOS/Resources/bin/arm64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..fd59d309b73a20 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-strip b/iOS/Resources/bin/arm64-apple-ios-strip new file mode 100755 index 00000000000000..75e823a3d02d61 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..c5cfb28929195a --- /dev/null +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@" diff --git a/iOS/testbed/__main__.py b/iOS/testbed/__main__.py deleted file mode 100644 index c05497ede3aa61..00000000000000 --- a/iOS/testbed/__main__.py +++ /dev/null @@ -1,548 +0,0 @@ -import argparse -import asyncio -import fcntl -import json -import os -import plistlib -import re -import shutil -import subprocess -import sys -import tempfile -from contextlib import asynccontextmanager -from datetime import datetime -from pathlib import Path - - -DECODE_ARGS = ("UTF-8", "backslashreplace") - -# The system log prefixes each line: -# 2025-01-17 16:14:29.090 Df iOSTestbed[23987:1fd393b4] (Python) ... -# 2025-01-17 16:14:29.090 E iOSTestbed[23987:1fd393b4] (Python) ... - -LOG_PREFIX_REGEX = re.compile( - r"^\d{4}-\d{2}-\d{2}" # YYYY-MM-DD - r"\s+\d+:\d{2}:\d{2}\.\d+" # HH:MM:SS.sss - r"\s+\w+" # Df/E - r"\s+iOSTestbed\[\d+:\w+\]" # Process/thread ID - r"\s+\(Python\)\s" # Logger name -) - - -# Work around a bug involving sys.exit and TaskGroups -# (https://github.com/python/cpython/issues/101515). -def exit(*args): - raise MySystemExit(*args) - - -class MySystemExit(Exception): - pass - - -class SimulatorLock: - # An fcntl-based filesystem lock that can be used to ensure that - def __init__(self, timeout): - self.filename = Path(tempfile.gettempdir()) / "python-ios-testbed" - self.timeout = timeout - - self.fd = None - - async def acquire(self): - # Ensure the lockfile exists - self.filename.touch(exist_ok=True) - - # Try `timeout` times to acquire the lock file, with a 1 second pause - # between each attempt. Report status every 10 seconds. - for i in range(0, self.timeout): - try: - fd = os.open(self.filename, os.O_RDWR | os.O_TRUNC, 0o644) - fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) - except OSError: - os.close(fd) - if i % 10 == 0: - print("... waiting", flush=True) - await asyncio.sleep(1) - else: - self.fd = fd - return - - # If we reach the end of the loop, we've exceeded the allowed number of - # attempts. - raise ValueError("Unable to obtain lock on iOS simulator creation") - - def release(self): - # If a lock is held, release it. - if self.fd is not None: - # Release the lock. - fcntl.flock(self.fd, fcntl.LOCK_UN) - os.close(self.fd) - self.fd = None - - -# All subprocesses are executed through this context manager so that no matter -# what happens, they can always be cancelled from another task, and they will -# always be cleaned up on exit. -@asynccontextmanager -async def async_process(*args, **kwargs): - process = await asyncio.create_subprocess_exec(*args, **kwargs) - try: - yield process - finally: - if process.returncode is None: - # Allow a reasonably long time for Xcode to clean itself up, - # because we don't want stale emulators left behind. - timeout = 10 - process.terminate() - try: - await asyncio.wait_for(process.wait(), timeout) - except TimeoutError: - print( - f"Command {args} did not terminate after {timeout} seconds " - f" - sending SIGKILL" - ) - process.kill() - - # Even after killing the process we must still wait for it, - # otherwise we'll get the warning "Exception ignored in __del__". - await asyncio.wait_for(process.wait(), timeout=1) - - -async def async_check_output(*args, **kwargs): - async with async_process( - *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs - ) as process: - stdout, stderr = await process.communicate() - if process.returncode == 0: - return stdout.decode(*DECODE_ARGS) - else: - raise subprocess.CalledProcessError( - process.returncode, - args, - stdout.decode(*DECODE_ARGS), - stderr.decode(*DECODE_ARGS), - ) - - -# Select a simulator device to use. -async def select_simulator_device(): - # List the testing simulators, in JSON format - raw_json = await async_check_output( - "xcrun", "simctl", "--set", "testing", "list", "-j" - ) - json_data = json.loads(raw_json) - - # Any device will do; we'll look for "SE" devices - but the name isn't - # consistent over time. Older Xcode versions will use "iPhone SE (Nth - # generation)"; As of 2025, they've started using "iPhone 16e". - # - # When Xcode is updated after a new release, new devices will be available - # and old ones will be dropped from the set available on the latest iOS - # version. Select the one with the highest minimum runtime version - this - # is an indicator of the "newest" released device, which should always be - # supported on the "most recent" iOS version. - se_simulators = sorted( - (devicetype["minRuntimeVersion"], devicetype["name"]) - for devicetype in json_data["devicetypes"] - if devicetype["productFamily"] == "iPhone" - and ( - ("iPhone " in devicetype["name"] and devicetype["name"].endswith("e")) - or "iPhone SE " in devicetype["name"] - ) - ) - - return se_simulators[-1][1] - - -# Return a list of UDIDs associated with booted simulators -async def list_devices(): - try: - # List the testing simulators, in JSON format - raw_json = await async_check_output( - "xcrun", "simctl", "--set", "testing", "list", "-j" - ) - json_data = json.loads(raw_json) - - # Filter out the booted iOS simulators - return [ - simulator["udid"] - for runtime, simulators in json_data["devices"].items() - for simulator in simulators - if runtime.split(".")[-1].startswith("iOS") and simulator["state"] == "Booted" - ] - except subprocess.CalledProcessError as e: - # If there's no ~/Library/Developer/XCTestDevices folder (which is the - # case on fresh installs, and in some CI environments), `simctl list` - # returns error code 1, rather than an empty list. Handle that case, - # but raise all other errors. - if e.returncode == 1: - return [] - else: - raise - - -async def find_device(initial_devices, lock): - while True: - new_devices = set(await list_devices()).difference(initial_devices) - if len(new_devices) == 0: - await asyncio.sleep(1) - elif len(new_devices) == 1: - udid = new_devices.pop() - print(f"{datetime.now():%Y-%m-%d %H:%M:%S}: New test simulator detected") - print(f"UDID: {udid}", flush=True) - lock.release() - return udid - else: - exit(f"Found more than one new device: {new_devices}") - - -async def log_stream_task(initial_devices, lock): - # Wait up to 5 minutes for the build to complete and the simulator to boot. - udid = await asyncio.wait_for(find_device(initial_devices, lock), 5 * 60) - - # Stream the iOS device's logs, filtering out messages that come from the - # XCTest test suite (catching NSLog messages from the test method), or - # Python itself (catching stdout/stderr content routed to the system log - # with config->use_system_logger). - args = [ - "xcrun", - "simctl", - "--set", - "testing", - "spawn", - udid, - "log", - "stream", - "--style", - "compact", - "--predicate", - ( - 'senderImagePath ENDSWITH "/iOSTestbedTests.xctest/iOSTestbedTests"' - ' OR senderImagePath ENDSWITH "/Python.framework/Python"' - ), - ] - - async with async_process( - *args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) as process: - suppress_dupes = False - while line := (await process.stdout.readline()).decode(*DECODE_ARGS): - # Strip the prefix from each log line - line = LOG_PREFIX_REGEX.sub("", line) - # The iOS log streamer can sometimes lag; when it does, it outputs - # a warning about messages being dropped... often multiple times. - # Only print the first of these duplicated warnings. - if line.startswith("=== Messages dropped "): - if not suppress_dupes: - suppress_dupes = True - sys.stdout.write(line) - else: - suppress_dupes = False - sys.stdout.write(line) - sys.stdout.flush() - - -async def xcode_test(location, simulator, verbose): - # Run the test suite on the named simulator - print("Starting xcodebuild...", flush=True) - args = [ - "xcodebuild", - "test", - "-project", - str(location / "iOSTestbed.xcodeproj"), - "-scheme", - "iOSTestbed", - "-destination", - f"platform=iOS Simulator,name={simulator}", - "-resultBundlePath", - str(location / f"{datetime.now():%Y%m%d-%H%M%S}.xcresult"), - "-derivedDataPath", - str(location / "DerivedData"), - ] - if not verbose: - args += ["-quiet"] - - async with async_process( - *args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) as process: - while line := (await process.stdout.readline()).decode(*DECODE_ARGS): - sys.stdout.write(line) - sys.stdout.flush() - - status = await asyncio.wait_for(process.wait(), timeout=1) - exit(status) - - -def clone_testbed( - source: Path, - target: Path, - framework: Path, - apps: list[Path], -) -> None: - if target.exists(): - print(f"{target} already exists; aborting without creating project.") - sys.exit(10) - - if framework is None: - if not ( - source / "Python.xcframework/ios-arm64_x86_64-simulator/bin" - ).is_dir(): - print( - f"The testbed being cloned ({source}) does not contain " - f"a simulator framework. Re-run with --framework" - ) - sys.exit(11) - else: - if not framework.is_dir(): - print(f"{framework} does not exist.") - sys.exit(12) - elif not ( - framework.suffix == ".xcframework" - or (framework / "Python.framework").is_dir() - ): - print( - f"{framework} is not an XCframework, " - f"or a simulator slice of a framework build." - ) - sys.exit(13) - - print("Cloning testbed project:") - print(f" Cloning {source}...", end="", flush=True) - shutil.copytree(source, target, symlinks=True) - print(" done") - - xc_framework_path = target / "Python.xcframework" - sim_framework_path = xc_framework_path / "ios-arm64_x86_64-simulator" - if framework is not None: - if framework.suffix == ".xcframework": - print(" Installing XCFramework...", end="", flush=True) - if xc_framework_path.is_dir(): - shutil.rmtree(xc_framework_path) - else: - xc_framework_path.unlink(missing_ok=True) - xc_framework_path.symlink_to( - framework.relative_to(xc_framework_path.parent, walk_up=True) - ) - print(" done") - else: - print(" Installing simulator framework...", end="", flush=True) - if sim_framework_path.is_dir(): - shutil.rmtree(sim_framework_path) - else: - sim_framework_path.unlink(missing_ok=True) - sim_framework_path.symlink_to( - framework.relative_to(sim_framework_path.parent, walk_up=True) - ) - print(" done") - else: - if ( - xc_framework_path.is_symlink() - and not xc_framework_path.readlink().is_absolute() - ): - # XCFramework is a relative symlink. Rewrite the symlink relative - # to the new location. - print(" Rewriting symlink to XCframework...", end="", flush=True) - orig_xc_framework_path = ( - source - / xc_framework_path.readlink() - ).resolve() - xc_framework_path.unlink() - xc_framework_path.symlink_to( - orig_xc_framework_path.relative_to( - xc_framework_path.parent, walk_up=True - ) - ) - print(" done") - elif ( - sim_framework_path.is_symlink() - and not sim_framework_path.readlink().is_absolute() - ): - print(" Rewriting symlink to simulator framework...", end="", flush=True) - # Simulator framework is a relative symlink. Rewrite the symlink - # relative to the new location. - orig_sim_framework_path = ( - source - / "Python.XCframework" - / sim_framework_path.readlink() - ).resolve() - sim_framework_path.unlink() - sim_framework_path.symlink_to( - orig_sim_framework_path.relative_to( - sim_framework_path.parent, walk_up=True - ) - ) - print(" done") - else: - print(" Using pre-existing iOS framework.") - - for app_src in apps: - print(f" Installing app {app_src.name!r}...", end="", flush=True) - app_target = target / f"iOSTestbed/app/{app_src.name}" - if app_target.is_dir(): - shutil.rmtree(app_target) - shutil.copytree(app_src, app_target) - print(" done") - - print(f"Successfully cloned testbed: {target.resolve()}") - - -def update_plist(testbed_path, args): - # Add the test runner arguments to the testbed's Info.plist file. - info_plist = testbed_path / "iOSTestbed" / "iOSTestbed-Info.plist" - with info_plist.open("rb") as f: - info = plistlib.load(f) - - info["TestArgs"] = args - - with info_plist.open("wb") as f: - plistlib.dump(info, f) - - -async def run_testbed(simulator: str | None, args: list[str], verbose: bool=False): - location = Path(__file__).parent - print("Updating plist...", end="", flush=True) - update_plist(location, args) - print(" done.", flush=True) - - if simulator is None: - simulator = await select_simulator_device() - print(f"Running test on {simulator}", flush=True) - - # We need to get an exclusive lock on simulator creation, to avoid issues - # with multiple simulators starting and being unable to tell which - # simulator is due to which testbed instance. See - # https://github.com/python/cpython/issues/130294 for details. Wait up to - # 10 minutes for a simulator to boot. - print("Obtaining lock on simulator creation...", flush=True) - simulator_lock = SimulatorLock(timeout=10*60) - await simulator_lock.acquire() - print("Simulator lock acquired.", flush=True) - - # Get the list of devices that are booted at the start of the test run. - # The simulator started by the test suite will be detected as the new - # entry that appears on the device list. - initial_devices = await list_devices() - - try: - async with asyncio.TaskGroup() as tg: - tg.create_task(log_stream_task(initial_devices, simulator_lock)) - tg.create_task(xcode_test(location, simulator=simulator, verbose=verbose)) - except* MySystemExit as e: - raise SystemExit(*e.exceptions[0].args) from None - except* subprocess.CalledProcessError as e: - # Extract it from the ExceptionGroup so it can be handled by `main`. - raise e.exceptions[0] - finally: - simulator_lock.release() - - -def main(): - parser = argparse.ArgumentParser( - description=( - "Manages the process of testing a Python project in the iOS simulator." - ), - ) - - subcommands = parser.add_subparsers(dest="subcommand") - - clone = subcommands.add_parser( - "clone", - description=( - "Clone the testbed project, copying in an iOS Python framework and" - "any specified application code." - ), - help="Clone a testbed project to a new location.", - ) - clone.add_argument( - "--framework", - help=( - "The location of the XCFramework (or simulator-only slice of an " - "XCFramework) to use when running the testbed" - ), - ) - clone.add_argument( - "--app", - dest="apps", - action="append", - default=[], - help="The location of any code to include in the testbed project", - ) - clone.add_argument( - "location", - help="The path where the testbed will be cloned.", - ) - - run = subcommands.add_parser( - "run", - usage="%(prog)s [-h] [--simulator SIMULATOR] -- <test arg> [<test arg> ...]", - description=( - "Run a testbed project. The arguments provided after `--` will be " - "passed to the running iOS process as if they were arguments to " - "`python -m`." - ), - help="Run a testbed project", - ) - run.add_argument( - "--simulator", - help=( - "The name of the simulator to use (eg: 'iPhone 16e'). Defaults to ", - "the most recently released 'entry level' iPhone device." - ) - ) - run.add_argument( - "-v", "--verbose", - action="store_true", - help="Enable verbose output", - ) - - try: - pos = sys.argv.index("--") - testbed_args = sys.argv[1:pos] - test_args = sys.argv[pos + 1 :] - except ValueError: - testbed_args = sys.argv[1:] - test_args = [] - - context = parser.parse_args(testbed_args) - - if context.subcommand == "clone": - clone_testbed( - source=Path(__file__).parent.resolve(), - target=Path(context.location).resolve(), - framework=Path(context.framework).resolve() if context.framework else None, - apps=[Path(app) for app in context.apps], - ) - elif context.subcommand == "run": - if test_args: - if not ( - Path(__file__).parent / "Python.xcframework/ios-arm64_x86_64-simulator/bin" - ).is_dir(): - print( - f"Testbed does not contain a compiled iOS framework. Use " - f"`python {sys.argv[0]} clone ...` to create a runnable " - f"clone of this testbed." - ) - sys.exit(20) - - asyncio.run( - run_testbed( - simulator=context.simulator, - verbose=context.verbose, - args=test_args, - ) - ) - else: - print(f"Must specify test arguments (e.g., {sys.argv[0]} run -- test)") - print() - parser.print_help(sys.stderr) - sys.exit(21) - else: - parser.print_help(sys.stderr) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/iOS/testbed/iOSTestbed/dylib-Info-template.plist b/iOS/testbed/iOSTestbed/dylib-Info-template.plist deleted file mode 100644 index f652e272f71c88..00000000000000 --- a/iOS/testbed/iOSTestbed/dylib-Info-template.plist +++ /dev/null @@ -1,26 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>en</string> - <key>CFBundleExecutable</key> - <string></string> - <key>CFBundleIdentifier</key> - <string></string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>APPL</string> - <key>CFBundleShortVersionString</key> - <string>1.0</string> - <key>CFBundleSupportedPlatforms</key> - <array> - <string>iPhoneOS</string> - </array> - <key>MinimumOSVersion</key> - <string>12.0</string> - <key>CFBundleVersion</key> - <string>1</string> -</dict> -</plist> diff --git a/pyconfig.h.in b/pyconfig.h.in index 7586ad3f266705..9502fcebf5d780 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -50,12 +50,6 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG -/* HACL* library can compile SIMD128 implementations */ -#undef HACL_CAN_COMPILE_SIMD128 - -/* HACL* library can compile SIMD256 implementations */ -#undef HACL_CAN_COMPILE_SIMD256 - /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -267,6 +261,10 @@ */ #undef HAVE_DECL_TZNAME +/* Define to 1 if you have the declaration of 'UT_NAMESIZE', and to 0 if you + don't. */ +#undef HAVE_DECL_UT_NAMESIZE + /* Define to 1 if you have the device macros. */ #undef HAVE_DEVICE_MACROS @@ -539,6 +537,9 @@ /* Define to 1 if you have the 'getlogin' function. */ #undef HAVE_GETLOGIN +/* Define to 1 if you have the 'getlogin_r' function. */ +#undef HAVE_GETLOGIN_R + /* Define to 1 if you have the 'getnameinfo' function. */ #undef HAVE_GETNAMEINFO @@ -807,6 +808,9 @@ /* Define this if you have the makedev macro. */ #undef HAVE_MAKEDEV +/* Define if you have the 'MAXLOGNAME' constant. */ +#undef HAVE_MAXLOGNAME + /* Define to 1 if you have the 'mbrtowc' function. */ #undef HAVE_MBRTOWC @@ -1575,6 +1579,9 @@ /* Define to 1 if you have the <utmp.h> header file. */ #undef HAVE_UTMP_H +/* Define if you have the 'HAVE_UT_NAMESIZE' constant. */ +#undef HAVE_UT_NAMESIZE + /* Define to 1 if you have the 'uuid_create' function. */ #undef HAVE_UUID_CREATE @@ -1584,6 +1591,9 @@ /* Define if uuid_generate_time_safe() exists. */ #undef HAVE_UUID_GENERATE_TIME_SAFE +/* Define if uuid_generate_time_safe() is able to deduce a MAC address. */ +#undef HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC + /* Define to 1 if you have the <uuid.h> header file. */ #undef HAVE_UUID_H @@ -1630,12 +1640,18 @@ /* Define to 1 if you have the 'writev' function. */ #undef HAVE_WRITEV +/* Define to 1 if you have the <zdict.h> header file. */ +#undef HAVE_ZDICT_H + /* Define if the zlib library has inflateCopy */ #undef HAVE_ZLIB_COPY /* Define to 1 if you have the <zlib.h> header file. */ #undef HAVE_ZLIB_H +/* Define to 1 if you have the <zstd.h> header file. */ +#undef HAVE_ZSTD_H + /* Define to 1 if you have the '_getpty' function. */ #undef HAVE__GETPTY @@ -1714,9 +1730,6 @@ /* Defined if Python is built as a shared library. */ #undef Py_ENABLE_SHARED -/* Defined if _Complex C type can be used with libffi. */ -#undef Py_FFI_SUPPORT_C_COMPLEX - /* Define if you want to disable the GIL */ #undef Py_GIL_DISABLED @@ -1724,9 +1737,6 @@ SipHash13: 3, externally defined: 0 */ #undef Py_HASH_ALGORITHM -/* Define if year with century should be normalized for strftime. */ -#undef Py_NORMALIZE_CENTURY - /* Define if you want to enable remote debugging support. */ #undef Py_REMOTE_DEBUG @@ -2004,6 +2014,18 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Defined if _Complex C type can be used with libffi. */ +#undef _Py_FFI_SUPPORT_C_COMPLEX + +/* HACL* library can compile SIMD128 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC128 + +/* HACL* library can compile SIMD256 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC256 + +/* Define to 1 if the machine stack grows down (default); 0 if it grows up. */ +#undef _Py_STACK_GROWS_DOWN + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT