diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..7c346434 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,4 @@ +[run] +omit = + */test/* +source = pysr diff --git a/.deepsource.toml b/.deepsource.toml deleted file mode 100644 index 24f681a3..00000000 --- a/.deepsource.toml +++ /dev/null @@ -1,16 +0,0 @@ -version = 1 - -test_patterns = ["test/*.py"] - -exclude_patterns = ["Project.toml"] - -[[analyzers]] -name = "python" -enabled = true - - [analyzers.meta] - runtime_version = "3.x.x" - -[[transformers]] -name = "black" -enabled = true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..394ec23d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "daily" + + - package-ecosystem: "github-actions" + # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) + directory: "/" + schedule: + # Check for updates to GitHub Actions every weekday + interval: "daily" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 91ca092a..1965d781 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -24,19 +24,31 @@ jobs: test: runs-on: ${{ matrix.os }} timeout-minutes: 60 + env: + COVERAGE_PROCESS_START: "${{ github.workspace }}/.coveragerc" defaults: run: shell: bash strategy: matrix: - julia-version: ['1.9'] - python-version: ['3.10'] + julia-version: ['1'] + python-version: ['3.12'] os: [ubuntu-latest] + test-id: [main] + include: + - julia-version: '1.6' + python-version: '3.8' + os: ubuntu-latest + test-id: include + - julia-version: '1' + python-version: '3.12' + os: ubuntu-latest + test-id: include steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Julia" - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} - name: "Cache Julia" @@ -45,47 +57,63 @@ jobs: cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} cache-packages: false - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip - name: "Install PySR" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - python setup.py install - python -m pysr install - - name: "Install Coverage tool" - run: pip install coverage coveralls + pip install . + python -c 'import pysr' + - name: "Assert Julia version" + if: ${{ matrix.julia-version != '1'}} + run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor" + - name: "Install test dependencies" + run: pip install coverage coveralls pytest nbval + - name: "Set up coverage for subprocesses" + run: echo 'import coverage; coverage.process_startup()' > "${{ github.workspace }}/sitecustomize.py" - name: "Run tests" - run: | - coverage run --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test main - coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test cli + run: coverage run -m pysr test main,cli,startup - name: "Install JAX" run: pip install jax jaxlib # (optional import) + if: ${{ matrix.test-id == 'main' }} - name: "Run JAX tests" - run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test jax + run: coverage run --append -m pysr test jax + if: ${{ matrix.test-id == 'main' }} - name: "Install Torch" run: pip install torch # (optional import) + if: ${{ matrix.test-id == 'main' }} - name: "Run Torch tests" - run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test torch - - name: "Run custom env tests" - run: coverage run --append --source=pysr --omit='*/test/*,*/feynman_problems.py' -m pysr.test env + run: coverage run --append -m pysr test torch + if: ${{ matrix.test-id == 'main' }} - name: "Coveralls" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COVERALLS_FLAG_NAME: test-${{ matrix.test-name }} + COVERALLS_FLAG_NAME: test-${{ matrix.julia-version }}-${{ matrix.python-version }} COVERALLS_PARALLEL: true run: coveralls --service=github - incremental_install: - runs-on: ubuntu-latest + dev_install: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: ['ubuntu-latest'] + python-version: ['3.12'] + julia-version: ['1'] + include: + - os: ubuntu-latest + python-version: '3.8' + julia-version: '1.6' steps: - - uses: actions/checkout@v2 - - name: "Build incremental install" - run: docker build -t pysr -f pysr/test/incremental_install_simulator.dockerfile . - - name: "Test incremental install" - run: docker run --rm pysr /bin/bash -l -c 'python3 -m pysr.test main && python3 -m pysr.test env' + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: "Install PySR" + run: | + python -m pip install --upgrade pip + pip install . + - name: "Run development test" + run: PYSR_TEST_JULIA_VERSION=${{ matrix.julia-version }} PYSR_TEST_PYTHON_VERSION=${{ matrix.python-version }} python -m pysr test dev conda_test: runs-on: ${{ matrix.os }} @@ -94,20 +122,20 @@ jobs: shell: bash -l {0} strategy: matrix: - python-version: ['3.9'] + python-version: ['3.12'] os: ['ubuntu-latest'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Cache conda" - uses: actions/cache@v2 + uses: actions/cache@v4 env: CACHE_NUMBER: 0 with: path: ~/conda_pkgs_dir key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} - name: "Set up Conda" - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -123,9 +151,9 @@ jobs: - name: "Install PySR" run: | python3 -m pip install . - python3 -m pysr install + python3 -c 'import pysr' - name: "Run tests" - run: cd /tmp && python -m pysr.test main + run: cd /tmp && python -m pysr test main coveralls: name: Indicate completion to coveralls.io @@ -152,20 +180,29 @@ jobs: shell: bash -l {0} strategy: matrix: - python-version: ['3.10'] + python-version: + - '3.12' + - '3.8' + os: ['ubuntu-latest'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip - name: "Install PySR and all dependencies" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install mypy jax jaxlib torch - python setup.py install + pip install . + pip install mypy + - name: "Install additional dependencies" + run: python -m pip install jax jaxlib torch + if: ${{ matrix.python-version != '3.8' }} - name: "Run mypy" - run: mypy --install-types --non-interactive pysr + run: python -m mypy --install-types --non-interactive pysr + if: ${{ matrix.python-version != '3.8' }} + - name: "Run compatible mypy" + run: python -m mypy --ignore-missing-imports pysr + if: ${{ matrix.python-version == '3.8' }} diff --git a/.github/workflows/CI_Windows.yml b/.github/workflows/CI_Windows.yml index 5ca0cce2..49b471ee 100644 --- a/.github/workflows/CI_Windows.yml +++ b/.github/workflows/CI_Windows.yml @@ -29,14 +29,14 @@ jobs: shell: bash strategy: matrix: - julia-version: ['1.9'] - python-version: ['3.10'] + julia-version: ['1'] + python-version: ['3.12'] os: [windows-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Julia" - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} - name: "Cache Julia" @@ -45,23 +45,20 @@ jobs: cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} cache-packages: false - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip - name: "Install PySR" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - python setup.py install - python -m pysr install + pip install pytest nbval + pip install . + python -c 'import pysr' - name: "Run tests" run: | - python -m pysr.test main - python -m pysr.test cli + python -m pysr test main,cli,startup - name: "Install Torch" run: pip install torch # (optional import) - name: "Run Torch tests" - run: python -m pysr.test torch - - name: "Run custom env tests" - run: python -m pysr.test env + run: python -m pysr test torch diff --git a/.github/workflows/CI_conda_forge.yml b/.github/workflows/CI_conda_forge.yml index 51c70f9a..98d064b9 100644 --- a/.github/workflows/CI_conda_forge.yml +++ b/.github/workflows/CI_conda_forge.yml @@ -20,13 +20,26 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] - os: ['ubuntu-latest', 'macos-latest'] + python-version: ['3.9', '3.10', '3.11', '3.12'] + os: ['ubuntu-latest'] use-mamba: [true, false] + include: + - python-version: 3.9 + os: 'windows-latest' + use-mamba: true + - python-version: 3.12 + os: 'windows-latest' + use-mamba: true + - python-version: 3.9 + os: 'macos-latest' + use-mamba: true + - python-version: 3.12 + os: 'macos-latest' + use-mamba: true steps: - name: "Set up Conda" - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge miniforge-version: latest @@ -40,4 +53,6 @@ jobs: run: conda activate pysr-test && conda install pysr if: ${{ !matrix.use-mamba }} - name: "Run tests" - run: python -m pysr.test main + run: | + pip install pytest nbval + python -m pysr test main,startup diff --git a/.github/workflows/CI_docker.yml b/.github/workflows/CI_docker.yml index 91082e35..80c3bc89 100644 --- a/.github/workflows/CI_docker.yml +++ b/.github/workflows/CI_docker.yml @@ -33,8 +33,8 @@ jobs: arch: ['linux/amd64'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build docker run: docker build --platform=${{ matrix.arch }} -t pysr . - name: Test docker - run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'python3 -m pysr.test main && python3 -m pysr.test cli && python3 -m pysr.test env' + run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup' diff --git a/.github/workflows/CI_docker_large_nightly.yml b/.github/workflows/CI_docker_large_nightly.yml index 7c8ac508..561ce155 100644 --- a/.github/workflows/CI_docker_large_nightly.yml +++ b/.github/workflows/CI_docker_large_nightly.yml @@ -18,19 +18,19 @@ jobs: strategy: fail-fast: false matrix: - julia-version: ['1.6', '1.9'] - python-version: ['3.10'] + julia-version: ['1.6', '1'] + python-version: ['3.8', '3.12'] os: [ubuntu-latest] arch: ['linux/amd64', 'linux/arm64'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v2 with: platforms: all - name: Build docker - run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} . + run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} . - name: Test docker - run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'python3 -m pysr.test main && python3 -m pysr.test cli && python3 -m pysr.test env' + run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup' diff --git a/.github/workflows/CI_large_nightly.yml b/.github/workflows/CI_large_nightly.yml index 950072b5..cbd9a7ef 100644 --- a/.github/workflows/CI_large_nightly.yml +++ b/.github/workflows/CI_large_nightly.yml @@ -23,30 +23,28 @@ jobs: strategy: fail-fast: false matrix: - julia-version: ['1.6', '1.8', '1.9'] - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + julia-version: ['1.6', '1.8', '1.10'] + python-version: ['3.8', '3.10', '3.12'] os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Julia" - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: "Install PySR" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - python setup.py install - python -m pysr install + pip install pytest nbval + pip install . + python -c 'import pysr' + - name: "Assert Julia version" + if: ${{ matrix.julia-version != '1'}} + run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor" - name: "Run tests" - run: | - python -m pysr.test main - python -m pysr.test cli - - name: "Run new env test" - run: python -m pysr.test env - if: ${{ !(matrix.os == 'windows-latest' && matrix.python-version == '3.7') }} + run: python -m pysr test main,cli,startup diff --git a/.github/workflows/CI_mac.yml b/.github/workflows/CI_mac.yml index 6edb13ca..e9763f0c 100644 --- a/.github/workflows/CI_mac.yml +++ b/.github/workflows/CI_mac.yml @@ -29,14 +29,14 @@ jobs: shell: bash strategy: matrix: - julia-version: ['1.9'] - python-version: ['3.10'] + julia-version: ['1'] + python-version: ['3.12'] os: [macos-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Julia" - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.julia-version }} - name: "Cache Julia" @@ -45,27 +45,24 @@ jobs: cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} cache-packages: false - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip - name: "Install PySR" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - python setup.py install - python -m pysr install + pip install pytest nbval + pip install . + python -c 'import pysr' - name: "Run tests" run: | - python -m pysr.test main - python -m pysr.test cli + python -m pysr test main,cli,startup - name: "Install JAX" run: pip install jax jaxlib # (optional import) - name: "Run JAX tests" - run: python -m pysr.test jax + run: python -m pysr test jax - name: "Install Torch" run: pip install torch # (optional import) - name: "Run Torch tests" - run: python -m pysr.test torch - - name: "Run custom env tests" - run: python -m pysr.test env + run: python -m pysr test torch diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 5605428d..9b96a00f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -27,11 +27,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -45,7 +45,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # โ„น๏ธ Command-line programs to run using the OS shell. # ๐Ÿ“š See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -58,4 +58,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/docker_deploy.yml b/.github/workflows/docker_deploy.yml index c7bc0e6a..09769701 100644 --- a/.github/workflows/docker_deploy.yml +++ b/.github/workflows/docker_deploy.yml @@ -18,11 +18,11 @@ jobs: matrix: os: [ubuntu-latest] arch: [linux/amd64] - python-version: [3.10.8] - julia-version: [1.8.2] + python-version: [3.12.3] + julia-version: [1.10.3] steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to Docker Hub uses: docker/login-action@v2 if: github.event_name != 'pull_request' @@ -38,9 +38,9 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: - # list of Docker images to use as base name for tags + # List of Docker images to use as base name for tags images: | mcranmer/pysr ghcr.io/${{ github.repository }} @@ -57,9 +57,9 @@ jobs: - name: Set up QEMU uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build and push - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: context: . platforms: ${{ matrix.arch }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2abe1477..af36db78 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,6 +10,7 @@ on: - 'docs/**' - 'setup.py' - 'README.md' + - 'mkdocs.yml' workflow_dispatch: jobs: @@ -20,16 +21,16 @@ jobs: shell: bash steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 cache: pip - name: "Install packages for docs building" run: pip install -r docs/requirements.txt - name: "Install PySR" - run: pip install -e . + run: pip install . && python -c 'import pysr' - name: "Build API docs" run: cd docs && ./gen_docs.sh - name: "Deploy documentation" diff --git a/.github/workflows/pypi_deploy.yml b/.github/workflows/pypi_deploy.yml index 25a17820..f23c5895 100644 --- a/.github/workflows/pypi_deploy.yml +++ b/.github/workflows/pypi_deploy.yml @@ -15,15 +15,15 @@ jobs: id-token: write steps: - name: "Checkout" - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Set up Python" - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.10.8 - name: "Install building tools" - run: pip install wheel + run: pip install build - name: "Build package" - run: python setup.py sdist bdist_wheel + run: python -m build - name: "Publish distribution ๐Ÿ“ฆ to Test PyPI" uses: pypa/gh-action-pypi-publish@release/v1 with: diff --git a/.github/workflows/update_backend.yml b/.github/workflows/update_backend.yml index 3b47b700..5bc56f6f 100644 --- a/.github/workflows/update_backend.yml +++ b/.github/workflows/update_backend.yml @@ -7,19 +7,16 @@ jobs: update_compat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.12 cache: pip - - name: "Install PySR" + - name: "Install dependencies" run: | python -m pip install --upgrade pip - pip install -r requirements.txt - python setup.py install - # Not needed: - # python -c 'import pysr; pysr.install()' + pip install tomlkit - name: "Get SymbolicRegression.jl latest version" id: get-latest @@ -27,35 +24,22 @@ jobs: cd $(mktemp -d) git clone https://github.com/MilesCranmer/SymbolicRegression.jl cd SymbolicRegression.jl - echo "version=$(git describe --tags --abbrev=0 | sed 's/^v//')" >> $GITHUB_OUTPUT + echo "version=$(git describe --tags --match='v*' --abbrev=0 | sed 's/^v//')" >> $GITHUB_OUTPUT - - name: "Get SymbolicRegression.jl version used in PySR" - id: get-current + - name: "Update SymbolicRegression.jl version in PySR" run: | - echo "version=$(python -c 'import pysr; print(pysr.version.__symbolic_regression_jl_version__)' 2>/dev/null)" >> $GITHUB_OUTPUT + python .github/workflows/update_backend_version.py ${{ steps.get-latest.outputs.version }} - # If versions are different, we want to take our checked-out version, - # create a new branch called "update_compat_{...}", where the "..." - # is a timestamp. We then want to - # go to pysr/version.py, bump the patch version of PySR (__version__), - # set the version of __symbolic_regression_jl_version__ to the latest - # version of SymbolicRegression.jl, and then commit and push. - # Finally, we will open a PR from this branch to master. - - name: "Update versions" - if: ${{ steps.get-latest.outputs.version != steps.get-current.outputs.version }} + - name: "Restore changes if no diff to `pysr/juliapkg.json`" run: | - # Bump PySR patch number: - CURRENT_PYSR_PATCH_VERSION=$(python -c 'import pysr; print(pysr.version.__version__.split(".")[-1], end="")' 2>/dev/null) - NEW_PYSR_PATCH_VERSION=$((CURRENT_PYSR_PATCH_VERSION + 1)) - sed -i "s/^__version__ = .*/__version__ = \"$(python -c 'import pysr; print(".".join(pysr.version.__version__.split(".")[:-1]), end="")' 2>/dev/null).${NEW_PYSR_PATCH_VERSION}\"/" pysr/version.py + if git diff --quiet pysr/juliapkg.json; then + echo "No changes to pysr/juliapkg.json. Restoring changes." + git restore pyproject.toml + fi - # Set SymbolicRegression.jl version: - sed -i "s/^__symbolic_regression_jl_version__ = .*/__symbolic_regression_jl_version__ = \"${{ steps.get-latest.outputs.version }}\"/" pysr/version.py - - - name: "Create PR" - uses: peter-evans/create-pull-request@v3 + - name: "Create PR if necessary" + uses: peter-evans/create-pull-request@v6 with: - token: ${{ secrets.REPO_SCOPED_TOKEN }} title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}" body: | This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml` @@ -63,4 +47,6 @@ jobs: It updates the backend version to v${{ steps.get-latest.outputs.version }}. For a full description of the changes, see the backend changelog: [v${{ steps.get-latest.outputs.version }}](https://github.com/MilesCranmer/SymbolicRegression.jl/releases/tag/v${{ steps.get-latest.outputs.version }}). delete-branch: true commit-message: "Update backend version to v${{ steps.get-latest.outputs.version }}" - add-paths: pysr/version.py + add-paths: | + pyproject.toml + pysr/juliapkg.json diff --git a/.github/workflows/update_backend_version.py b/.github/workflows/update_backend_version.py new file mode 100644 index 00000000..696da9f5 --- /dev/null +++ b/.github/workflows/update_backend_version.py @@ -0,0 +1,31 @@ +import json +import sys +from pathlib import Path + +import tomlkit + +new_backend_version = sys.argv[1] + +assert not new_backend_version.startswith("v"), "Version should not start with 'v'" + +pyproject_toml = Path(__file__).parent / ".." / ".." / "pyproject.toml" +juliapkg_json = Path(__file__).parent / ".." / ".." / "pysr" / "juliapkg.json" + +with open(pyproject_toml) as toml_file: + pyproject_data = tomlkit.parse(toml_file.read()) + +with open(juliapkg_json) as f: + juliapkg_data = json.load(f) + +major, minor, patch, *dev = pyproject_data["project"]["version"].split(".") +pyproject_data["project"]["version"] = f"{major}.{minor}.{int(patch)+1}" + +juliapkg_data["packages"]["SymbolicRegression"]["version"] = f"={new_backend_version}" + +with open(pyproject_toml, "w") as toml_file: + toml_file.write(tomlkit.dumps(pyproject_data)) + +with open(juliapkg_json, "w") as f: + json.dump(juliapkg_data, f, indent=4) + # Ensure ends with newline + f.write("\n") diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 52f19888..8b76dbf6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: # General linting - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -9,15 +9,17 @@ repos: - id: check-added-large-files # General formatting - repo: https://github.com/psf/black - rev: 23.11.0 + rev: 24.4.0 hooks: - id: black - id: black-jupyter + exclude: pysr/test/test_nb.ipynb # Stripping notebooks - repo: https://github.com/kynan/nbstripout - rev: 0.6.1 + rev: 0.7.1 hooks: - id: nbstripout + exclude: pysr/test/test_nb.ipynb # Unused imports - repo: https://github.com/hadialqattan/pycln rev: "v2.4.0" @@ -25,7 +27,7 @@ repos: - id: pycln # Sorted imports - repo: https://github.com/PyCQA/isort - rev: "5.13.0" + rev: "5.13.2" hooks: - id: isort additional_dependencies: [toml] diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f1695ea7..409cb7f1 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -42,7 +42,7 @@ Scan through our [existing issues](https://github.com/MilesCranmer/PySR/issues) check out the [guide](https://astroautomata.com/PySR/backend/) on modifying a custom SymbolicRegression.jl library. In this case, you might instead be interested in making suggestions to the [SymbolicRegression.jl](http://github.com/MilesCranmer/SymbolicRegression.jl) library. -4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr.test main`. +4. You can install your local version of PySR with `python setup.py install`, and run tests with `python -m pysr test main`. ### Commit your update @@ -83,10 +83,11 @@ Thanks for being part of the PySR community! Johan Blรฅbรคck
Johan Blรฅbรคck

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““ JuliusMartensen
JuliusMartensen

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฆ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ง ๐Ÿ““ ngam
ngam

๐Ÿ’ป ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ + Christopher Rowley
Christopher Rowley

๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ‘€ Kaze Wong
Kaze Wong

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““ - Christopher Rackauckas
Christopher Rackauckas

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““ + Christopher Rackauckas
Christopher Rackauckas

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““ Patrick Kidger
Patrick Kidger

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““ Okon Samuel
Okon Samuel

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿšง ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““ William Booth-Clibborn
William Booth-Clibborn

๐Ÿ’ป ๐ŸŒ ๐Ÿ“– ๐Ÿ““ ๐Ÿšง ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ @@ -94,24 +95,32 @@ Thanks for being part of the PySR community! Jerry Ling
Jerry Ling

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐ŸŒ ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““ Charles Fox
Charles Fox

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““ Johann Brehmer
Johann Brehmer

๐Ÿ’ป ๐Ÿ“– ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ โš ๏ธ ๐Ÿ““ - Marius Millea
Marius Millea

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““ + Marius Millea
Marius Millea

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““ Coba
Coba

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ““ + foxtran
foxtran

๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ”ง ๐Ÿ““ + Shah Mahdi Hasan
Shah Mahdi Hasan

๐Ÿ› ๐Ÿ’ป ๐Ÿ‘€ ๐Ÿ““ Pietro Monticone
Pietro Monticone

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก Mateusz Kubica
Mateusz Kubica

๐Ÿ“– ๐Ÿ’ก Jay Wadekar
Jay Wadekar

๐Ÿ› ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ Anthony Blaom, PhD
Anthony Blaom, PhD

๐Ÿš‡ ๐Ÿ’ก ๐Ÿ‘€ + + Jgmedina95
Jgmedina95

๐Ÿ› ๐Ÿ’ก ๐Ÿ‘€ Michael Abbott
Michael Abbott

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ”ง Oscar Smith
Oscar Smith

๐Ÿ’ป ๐Ÿ’ก - - Eric Hanson
Eric Hanson

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ““ Henrique Becker
Henrique Becker

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ qwertyjl
qwertyjl

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก ๐Ÿ““ Rik Huijzer
Rik Huijzer

๐Ÿ’ก ๐Ÿš‡ Hongyu Wang
Hongyu Wang

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ + + + Zehao Jin
Zehao Jin

๐Ÿ”ฌ ๐Ÿ“ฃ + Tanner Mengel
Tanner Mengel

๐Ÿ”ฌ ๐Ÿ“ฃ + Arthur Grundner
Arthur Grundner

๐Ÿ”ฌ ๐Ÿ“ฃ + sjwetzel
sjwetzel

๐Ÿ”ฌ ๐Ÿ“ฃ ๐Ÿ““ Saurav Maheshkar
Saurav Maheshkar

๐Ÿ”ง diff --git a/Dockerfile b/Dockerfile index 5a2cad2a..8b87b925 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # This builds a dockerfile containing a working copy of PySR # with all pre-requisites installed. -ARG JLVERSION=1.9.1 -ARG PYVERSION=3.10.11 +ARG JLVERSION=1.9.4 +ARG PYVERSION=3.11.6 ARG BASE_IMAGE=bullseye FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl @@ -13,22 +13,23 @@ COPY --from=jl /usr/local/julia /usr/local/julia ENV PATH="/usr/local/julia/bin:${PATH}" # Install IPython and other useful libraries: -RUN pip install ipython matplotlib +RUN pip install --no-cache-dir ipython matplotlib WORKDIR /pysr # Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project) ADD ./requirements.txt /pysr/requirements.txt -RUN pip3 install -r /pysr/requirements.txt +RUN pip3 install --no-cache-dir -r /pysr/requirements.txt # Install PySR: # We do a minimal copy so it doesn't need to rerun at every file change: +ADD ./pyproject.toml /pysr/pyproject.toml ADD ./setup.py /pysr/setup.py -ADD ./pysr/ /pysr/pysr/ -RUN pip3 install . +ADD ./pysr /pysr/pysr +RUN pip3 install --no-cache-dir . # Install Julia pre-requisites: -RUN python3 -m pysr install +RUN python3 -c 'import pysr' # metainformation LABEL org.opencontainers.image.authors = "Miles Cranmer" diff --git a/README.md b/README.md index 417fc605..8ee07b0b 100644 --- a/README.md +++ b/README.md @@ -24,71 +24,11 @@ If you've finished a project with PySR, please submit a PR to showcase your work **Contents**: -- [Contributors](#contributors-) - [Why PySR?](#why-pysr) - [Installation](#installation) - [Quickstart](#quickstart) - [โ†’ Documentation](https://astroautomata.com/PySR) - -
- -### Contributors โœจ - -
- -We are eager to welcome new contributors! Check out our contributors [guide](https://github.com/MilesCranmer/PySR/blob/master/CONTRIBUTORS.md) for tips ๐Ÿš€. -If you have an idea for a new feature, don't hesitate to share it on the [issues](https://github.com/MilesCranmer/PySR/issues) or [discussions](https://github.com/MilesCranmer/PySR/discussions) page. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Mark Kittisopikul
Mark Kittisopikul

๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
T Coxon
T Coxon

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Dhananjay Ashok
Dhananjay Ashok

๐Ÿ’ป ๐ŸŒ ๐Ÿ’ก ๐Ÿšง โš ๏ธ
Johan Blรฅbรคck
Johan Blรฅbรคck

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““
JuliusMartensen
JuliusMartensen

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฆ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ง ๐Ÿ““
ngam
ngam

๐Ÿ’ป ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
Kaze Wong
Kaze Wong

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Christopher Rackauckas
Christopher Rackauckas

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Patrick Kidger
Patrick Kidger

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Okon Samuel
Okon Samuel

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿšง ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““
William Booth-Clibborn
William Booth-Clibborn

๐Ÿ’ป ๐ŸŒ ๐Ÿ“– ๐Ÿ““ ๐Ÿšง ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
Pablo Lemos
Pablo Lemos

๐Ÿ› ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Jerry Ling
Jerry Ling

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐ŸŒ ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““
Charles Fox
Charles Fox

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Johann Brehmer
Johann Brehmer

๐Ÿ’ป ๐Ÿ“– ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ โš ๏ธ ๐Ÿ““
Marius Millea
Marius Millea

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““
Coba
Coba

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ““
Pietro Monticone
Pietro Monticone

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก
Mateusz Kubica
Mateusz Kubica

๐Ÿ“– ๐Ÿ’ก
Jay Wadekar
Jay Wadekar

๐Ÿ› ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ
Anthony Blaom, PhD
Anthony Blaom, PhD

๐Ÿš‡ ๐Ÿ’ก ๐Ÿ‘€
Jgmedina95
Jgmedina95

๐Ÿ› ๐Ÿ’ก ๐Ÿ‘€
Michael Abbott
Michael Abbott

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ”ง
Oscar Smith
Oscar Smith

๐Ÿ’ป ๐Ÿ’ก
Eric Hanson
Eric Hanson

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ““
Henrique Becker
Henrique Becker

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€
qwertyjl
qwertyjl

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก ๐Ÿ““
Rik Huijzer
Rik Huijzer

๐Ÿ’ก ๐Ÿš‡
Hongyu Wang
Hongyu Wang

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ
Saurav Maheshkar
Saurav Maheshkar

๐Ÿ”ง
- - - - - +- [Contributors](#contributors-)
@@ -125,48 +65,31 @@ an explicit and powerful way to interpret deep neural networks. ## Installation -| [pip](#pip) | [conda](#conda) | [docker](#docker-build) | -|:---:|:---:|:---:| -| Everywhere (recommended) | Linux and Intel-based macOS | Everywhere (if all else fails) | - ---- +### Pip -### pip +You can install PySR with pip: -1. [Install Julia](https://julialang.org/downloads/) - - Alternatively, my personal preference is to use [juliaup](https://github.com/JuliaLang/juliaup#installation), which performs this automatically. -2. Then, run: -```bash -pip3 install -U pysr -``` -3. Finally, to install Julia dependencies: ```bash -python3 -m pysr install +pip install pysr ``` -> (Alternatively, from within Python, you can call `import pysr; pysr.install()`) ---- +Julia dependencies will be installed at first import. -### conda +### Conda -The PySR build in conda includes all required dependencies, so you can install it by simply running: +Similarly, with conda: ```bash conda install -c conda-forge pysr ``` -from within your target conda environment. -However, note that the conda install does not support precompilation of Julia libraries, so the -start time may be slightly slower as the JIT-compilation will be running. -(Once the compilation finishes, there will not be a performance difference though.) - ---- +### Dockerfile -### docker build +You can also use the `Dockerfile` to install PySR in a docker container 1. Clone this repo. -2. In the repo, run the build command with: +2. Within the repo's directory, build the docker container: ```bash docker build -t pysr . ``` @@ -179,13 +102,21 @@ For more details, see the [docker section](#docker). --- -### Common issues +### Troubleshooting -Common issues tend to be related to Python not finding Julia. -To debug this, try running `python3 -c 'import os; print(os.environ["PATH"])'`. -If none of these folders contain your Julia binary, then you need to add Julia's `bin` folder to your `PATH` environment variable. +One issue you might run into can result in a hard crash at import with +a message like "`GLIBCXX_...` not found". This is due to another one of the Python dependencies +loading an incorrect `libstdc++` library. To fix this, you should modify your +`LD_LIBRARY_PATH` variable to reference the Julia libraries. For example, if the Julia +version of `libstdc++.so` is located in `$HOME/.julia/juliaup/julia-1.10.0+0.x64.linux.gnu/lib/julia/` +(which likely differs on your system!), you could add: + +``` +export LD_LIBRARY_PATH=$HOME/.julia/juliaup/julia-1.10.0+0.x64.linux.gnu/lib/julia/:$LD_LIBRARY_PATH +``` + +to your `.bashrc` or `.zshrc` file. -**Running PySR on macOS with an M1 processor:** you should use the pip version, and make sure to get the Julia binary for ARM/M-series processors. ## Quickstart @@ -223,7 +154,7 @@ model = PySRRegressor( ], extra_sympy_mappings={"inv": lambda x: 1 / x}, # ^ Define operator for SymPy as well - loss="loss(prediction, target) = (prediction - target)^2", + elementwise_loss="loss(prediction, target) = (prediction - target)^2", # ^ Custom loss function (julia syntax) ) ``` @@ -286,7 +217,7 @@ You may load the model from the `pkl` file with: model = PySRRegressor.from_file("hall_of_fame.2022-08-10_100832.281.pkl") ``` -There are several other useful features such as denoising (e.g., `denoising=True`), +There are several other useful features such as denoising (e.g., `denoise=True`), feature selection (e.g., `select_k_features=3`). For examples of these and other features, see the [examples page](https://astroautomata.com/PySR/examples). For a detailed look at more options, see the [options page](https://astroautomata.com/PySR/options). @@ -306,7 +237,7 @@ model = PySRRegressor( # ^ 2 populations per core, so one is always running. population_size=50, # ^ Slightly larger populations, for greater diversity. - ncyclesperiteration=500, + ncycles_per_iteration=500, # ^ Generations between migrations. niterations=10000000, # Run forever early_stop_condition=( @@ -356,13 +287,8 @@ model = PySRRegressor( # ^ Higher precision calculations. warm_start=True, # ^ Start from where left off. - turbo=True, + bumper=True, # ^ Faster evaluation (experimental) - julia_project=None, - # ^ Can set to the path of a folder containing the - # "SymbolicRegression.jl" repo, for custom modifications. - update=False, - # ^ Don't update Julia packages extra_sympy_mappings={"cos2": lambda x: sympy.cos(x)**2}, # extra_torch_mappings={sympy.cos: torch.cos}, # ^ Not needed as cos already defined, but this @@ -383,9 +309,14 @@ docker build -t pysr . ``` This builds an image called `pysr` for your system's architecture, -which also contains IPython. +which also contains IPython. You can select a specific version +of Python and Julia with: -You can then run this with: +```bash +docker build -t pysr --build-arg JLVERSION=1.10.0 --build-arg PYVERSION=3.11.6 . +``` + +You can then run with this dockerfile using: ```bash docker run -it --rm -v "$PWD:/data" pysr ipython @@ -397,3 +328,72 @@ and then launch ipython. If you have issues building for your system's architecture, you can emulate another architecture by including `--platform linux/amd64`, before the `build` and `run` commands. + +
+ +### Contributors โœจ + +
+ +We are eager to welcome new contributors! Check out our contributors [guide](https://github.com/MilesCranmer/PySR/blob/master/CONTRIBUTORS.md) for tips ๐Ÿš€. +If you have an idea for a new feature, don't hesitate to share it on the [issues](https://github.com/MilesCranmer/PySR/issues) or [discussions](https://github.com/MilesCranmer/PySR/discussions) page. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Mark Kittisopikul
Mark Kittisopikul

๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
T Coxon
T Coxon

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Dhananjay Ashok
Dhananjay Ashok

๐Ÿ’ป ๐ŸŒ ๐Ÿ’ก ๐Ÿšง โš ๏ธ
Johan Blรฅbรคck
Johan Blรฅbรคck

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““
JuliusMartensen
JuliusMartensen

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฆ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ง ๐Ÿ““
ngam
ngam

๐Ÿ’ป ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
Christopher Rowley
Christopher Rowley

๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฆ ๐Ÿ‘€
Kaze Wong
Kaze Wong

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿš‡ ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Christopher Rackauckas
Christopher Rackauckas

๐Ÿ› ๐Ÿ’ป ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Patrick Kidger
Patrick Kidger

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿ”Œ ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ”ง โš ๏ธ ๐Ÿ““
Okon Samuel
Okon Samuel

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐Ÿšง ๐Ÿ’ก ๐Ÿš‡ ๐Ÿ‘€ โš ๏ธ ๐Ÿ““
William Booth-Clibborn
William Booth-Clibborn

๐Ÿ’ป ๐ŸŒ ๐Ÿ“– ๐Ÿ““ ๐Ÿšง ๐Ÿ‘€ ๐Ÿ”ง โš ๏ธ
Pablo Lemos
Pablo Lemos

๐Ÿ› ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Jerry Ling
Jerry Ling

๐Ÿ› ๐Ÿ’ป ๐Ÿ“– ๐ŸŒ ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““
Charles Fox
Charles Fox

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ ๐Ÿ““
Johann Brehmer
Johann Brehmer

๐Ÿ’ป ๐Ÿ“– ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ”ฌ โš ๏ธ ๐Ÿ““
Marius Millea
Marius Millea

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ‘€ ๐Ÿ““
Coba
Coba

๐Ÿ› ๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ““
foxtran
foxtran

๐Ÿ’ป ๐Ÿ’ก ๐Ÿšง ๐Ÿ”ง ๐Ÿ““
Shah Mahdi Hasan
Shah Mahdi Hasan

๐Ÿ› ๐Ÿ’ป ๐Ÿ‘€ ๐Ÿ““
Pietro Monticone
Pietro Monticone

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก
Mateusz Kubica
Mateusz Kubica

๐Ÿ“– ๐Ÿ’ก
Jay Wadekar
Jay Wadekar

๐Ÿ› ๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ
Anthony Blaom, PhD
Anthony Blaom, PhD

๐Ÿš‡ ๐Ÿ’ก ๐Ÿ‘€
Jgmedina95
Jgmedina95

๐Ÿ› ๐Ÿ’ก ๐Ÿ‘€
Michael Abbott
Michael Abbott

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€ ๐Ÿ”ง
Oscar Smith
Oscar Smith

๐Ÿ’ป ๐Ÿ’ก
Eric Hanson
Eric Hanson

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ““
Henrique Becker
Henrique Becker

๐Ÿ’ป ๐Ÿ’ก ๐Ÿ‘€
qwertyjl
qwertyjl

๐Ÿ› ๐Ÿ“– ๐Ÿ’ก ๐Ÿ““
Rik Huijzer
Rik Huijzer

๐Ÿ’ก ๐Ÿš‡
Hongyu Wang
Hongyu Wang

๐Ÿ’ก ๐Ÿ“ฃ ๐Ÿ”ฌ
Zehao Jin
Zehao Jin

๐Ÿ”ฌ ๐Ÿ“ฃ
Tanner Mengel
Tanner Mengel

๐Ÿ”ฌ ๐Ÿ“ฃ
Arthur Grundner
Arthur Grundner

๐Ÿ”ฌ ๐Ÿ“ฃ
sjwetzel
sjwetzel

๐Ÿ”ฌ ๐Ÿ“ฃ ๐Ÿ““
Saurav Maheshkar
Saurav Maheshkar

๐Ÿ”ง
+ + + + + diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 0ced06bb..00000000 --- a/TODO.md +++ /dev/null @@ -1,142 +0,0 @@ -# TODO - -- [x] Async threading, and have a server of equations. So that threads aren't waiting for others to finish. -- [x] Print out speed of equation evaluation over time. Measure time it takes per cycle -- [x] Add ability to pass an operator as an anonymous function string. E.g., `binary_operators=["g(x, y) = x+y"]`. -- [x] Add error bar capability (thanks Johannes Buchner for suggestion) -- [x] Why don't the constants continually change? It should optimize them every time the equation appears. - - Restart the optimizer to help with this. -- [x] Add several common unary and binary operators; list these. -- [x] Try other initial conditions for optimizer -- [x] Make scaling of changes to constant a hyperparameter -- [x] Make deletion op join deleted subtree to parent -- [x] Update hall of fame every iteration? - - Seems to overfit early if we do this. -- [x] Consider adding mutation to pass an operator in through a new binary operator (e.g., exp(x3)->plus(exp(x3), ...)) - - (Added full insertion operator -- [x] Add a node at the top of a tree -- [x] Insert a node at the top of a subtree -- [x] Record very best individual in each population, and return at end. -- [x] Write our own tree copy operation; deepcopy() is the slowest operation by far. -- [x] Hyperparameter tune -- [x] Create a benchmark for accuracy -- [x] Add interface for either defining an operation to learn, or loading in arbitrary dataset. - - Could just write out the dataset in julia, or load it. -- [x] Create a Python interface -- [x] Explicit constant optimization on hall-of-fame - - Create method to find and return all constants, from left to right - - Create method to find and set all constants, in same order - - Pull up some optimization algorithm and add it. Keep the package small! -- [x] Create a benchmark for speed -- [x] Simplify subtrees with only constants beneath them. Or should I? Maybe randomly simplify sometimes? -- [x] Record hall of fame -- [x] Optionally (with hyperparameter) migrate the hall of fame, rather than current bests -- [x] Test performance of reduced precision integers - - No effect -- [x] Create struct to pass through all hyperparameters, instead of treating as constants - - Make sure doesn't affect performance -- [x] Rename package to avoid trademark issues - - PySR? -- [x] Put on PyPI -- [x] Treat baseline as a solution. -- [x] Print score alongside MSE: \delta \log(MSE)/\delta \log(complexity) -- [x] Calculating the loss function - there is duplicate calculations happening. -- [x] Declaration of the weights array every iteration -- [x] Sympy evaluation -- [x] Threaded recursion -- [x] Test suite -- [x] Performance: - Use an enum for functions instead of storing them? - - Gets ~40% speedup on small test. -- [x] Use @fastmath -- [x] Try @spawn over each sub-population. Do random sort, compute mutation for each, then replace 10% oldest. -- [x] Control max depth, rather than max number of nodes? -- [x] Allow user to pass names for variables - use these when printing -- [x] Check for domain errors in an equation quickly before actually running the entire array over it. (We do this now recursively - every single equation is checked for nans/infs when being computed.) -- [x] read the docs page -- [x] Create backup csv file so always something to copy from for `PySR`. Also use random hall of fame file by default. Call function to read from csv after running, so dont need to run again. Dump scores alongside MSE to .csv (and return with Pandas). -- [x] Better cleanup of zombie processes after -- [x] Consider printing output sorted by score, not by complexity. -- [x] Increase max complexity slowly over time up to the actual max. -- [x] Record density over complexity. Favor equations that have a density we have not explored yet. Want the final density to be evenly distributed. -- [x] Do printing from Python side. Then we can do simplification and pretty-printing. -- [x] Sympy printing -- [x] Store Project.toml inside PySR's python code, rather than copied to site-packages. -- [ ] Sort these todo lists by priority - -- [ ] Automatically convert log, log10, log2, pow to the correct operators. -- [ ] I think the simplification isn't working correctly (post-merging SymbolicUtils.) -- [ ] Show demo of PySRRegressor. Fit equations, then show how to view equations. -- [ ] Add "selected" column string to regular equations dict. -- [ ] List "Loss" instead of "MSE" - -## Feature ideas - -- [ ] Other default losses (e.g., abs, other likelihoods, or just allow user to pass this as a string). -- [ ] Other dtypes available -- [ ] NDSA-II -- [ ] Cross-validation -- [ ] Hierarchical model, so can re-use functional forms. Output of one equation goes into second equation? -- [ ] Add function to plot equations -- [ ] Refresh screen rather than dumping to stdout? -- [ ] Add ability to save state from python -- [ ] Additional degree operators? -- [ ] Multi targets (vector ops). Idea 1: Node struct contains argument for which registers it is applied to. Then, can work with multiple components simultaneously. Though this may be tricky to get right. Idea 2: each op is defined by input/output space. Some operators are flexible, and the spaces should be adjusted automatically. Otherwise, only consider ops that make a tree possible. But will need additional ops here to get it to work. Idea 3: define each equation in 2 parts: one part that is shared between all outputs, and one that is different between all outputs. Maybe this could be an array of nodes corresponding to each output. And those nodes would define their functions. - - Much easier option: simply flatten the output vector, and set the index as another input feature. The equation learned will be a single equation containing indices as a feature. -- [ ] Tree crossover? I.e., can take as input a part of the same equation, so long as it is the same level or below? -- [ ] Create flexible way of providing "simplification recipes." I.e., plus(plus(T, C), C) => plus(T, +(C, C)). The user could pass these. -- [ ] Consider allowing multi-threading turned off, for faster testing (cache issue on travis). Or could simply fix the caching issue there. -- [ ] Consider returning only the equation of interest; rather than all equations. -- [ ] Enable derivative operators. These would differentiate their right argument wrt their left argument, some input variable. - -## Algorithmic performance ideas: - - -- [ ] Use package compiler and compile sr.jl into a standalone binary that can be used by pysr. -- [ ] When doing equation warmup, only migrate those equations with almost the same complexity. Rather than having to consider simple equations later in the game. -- [ ] Right now we only update the score based on some. Need to update score based on entire data! Note that optimizer only is used sometimes. -- [ ] Idea: use gradient of equation with respect to each operator (perhaps simply add to each operator) to tell which part is the most "sensitive" to changes. Then, perhaps insert/delete/mutate on that part of the tree? -- [ ] Start populations staggered; so that there is more frequent printing (and pops that start a bit later get hall of fame already)? -- [ ] Consider adding mutation for constant<->variable -- [ ] Implement more parts of the original Eureqa algorithms: https://www.creativemachineslab.com/eureqa.html -- [ ] Experiment with freezing parts of model; then we only append/delete at end of tree. -- [ ] Use NN to generate weights over all probability distribution conditional on error and existing equation, and train on some randomly-generated equations -- [ ] For hierarchical idea: after running some number of iterations, do a search for "most common pattern". Then, turn that subtree into its own operator. -- [ ] Calculate feature importances based on features we've already seen, then weight those features up in all random generations. -- [ ] Calculate feature importances of future mutations, by looking at correlation between residual of model, and the features. - - Store feature importances of future, and periodically update it. -- [ ] Punish depth rather than size, as depth really hurts during optimization. - - -## Code performance ideas: - -- [ ] How hard is it to turn the recursive array evaluation into a for loop? -- [ ] Try defining a binary tree as an array, rather than a linked list. See https://stackoverflow.com/a/6384714/2689923 - - in array branch -- [ ] Add true multi-node processing, with MPI, or just file sharing. Multiple populations per core. - - Ongoing in cluster branch -- [ ] Performance: try inling things? -- [ ] Try storing things like number nodes in a tree; then can iterate instead of counting - -```julia -mutable struct Tree - degree::Array{Integer, 1} - val::Array{Float32, 1} - constant::Array{Bool, 1} - op::Array{Integer, 1} - Tree(s::Integer) = new(zeros(Integer, s), zeros(Float32, s), zeros(Bool, s), zeros(Integer, s)) -end -``` - -- Then, we could even work with trees on the GPU, since they are all pre-allocated arrays. -- A population could be a Tree, but with degree 2 on all the degrees. So a slice of population arrays forms a tree. -- How many operations can we do via matrix ops? Mutate node=>easy. -- Can probably batch and do many operations at once across a population. - - Or, across all populations! Mutate operator: index 2D array and set it to random vector? But the indexing might hurt. -- The big advantage: can evaluate all new mutated trees at once; as massive matrix operation. - - Can control depth, rather than maxsize. Then just pretend all trees are full and same depth. Then we really don't need to care about depth. - -- [ ] Can we cache calculations, or does the compiler do that? E.g., I should only have to run exp(x0) once; after that it should be read from memory. - - Done on caching branch. Currently am finding that this is quiet slow (presumably because memory allocation is the main issue). -- [ ] Add GPU capability? - - Not sure if possible, as binary trees are the real bottleneck. - - Could generate on CPU, evaluate score on GPU? diff --git a/benchmarks/hyperparamopt.py b/benchmarks/hyperparamopt.py index e6f4b1cf..385fc644 100644 --- a/benchmarks/hyperparamopt.py +++ b/benchmarks/hyperparamopt.py @@ -1,4 +1,5 @@ """Start a hyperoptimization from a single node""" + import pickle as pkl import sys @@ -149,6 +150,8 @@ def run_trial(args): weight_mutate_constant=10.0, # weight_mutate_operator=1, weight_mutate_operator=1.0, + # weight_swap_operands=1, + weight_swap_operands=1.0, # weight_randomize=1, weight_randomize=1.0, # weight_simplify=0.002, diff --git a/benchmarks/print_best_model.py b/benchmarks/print_best_model.py index 3dd9a6b1..ec847881 100644 --- a/benchmarks/print_best_model.py +++ b/benchmarks/print_best_model.py @@ -1,4 +1,5 @@ """Print the best model parameters and loss""" + import pickle as pkl from pprint import PrettyPrinter diff --git a/benchmarks/space.py b/benchmarks/space.py index 5d6a2e4e..8099e8b2 100644 --- a/benchmarks/space.py +++ b/benchmarks/space.py @@ -53,6 +53,10 @@ weight_mutate_operator=hp.loguniform( "weight_mutate_operator", np.log(0.0001), np.log(100) ), + # weight_swap_operands=1, + weight_swap_operands=hp.loguniform( + "weight_swap_operands", np.log(0.0001), np.log(100) + ), # weight_randomize=1, weight_randomize=hp.loguniform("weight_randomize", np.log(0.0001), np.log(100)), # weight_simplify=0.002, diff --git a/datasets/FeynmanEquations.csv b/datasets/FeynmanEquations.csv deleted file mode 100644 index bd80cfba..00000000 --- a/datasets/FeynmanEquations.csv +++ /dev/null @@ -1,101 +0,0 @@ -Filename,datapoints,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,v2_high,v3_name,v3_low,v3_high,v4_name,v4_low,v4_high,v5_name,v5_low,v5_high,v6_name,v6_low,v6_high,v7_name,v7_low,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high -I.6.2a,10,1,f,exp(-theta**2/2)/sqrt(2*pi),1,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,, -I.6.2,100,2,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2,sigma,1,3,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,, -I.6.2b,1000,3,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma),3,sigma,1,3,theta,1,3,theta1,1,3,,,,,,,,,,,,,,,,,,,,, -I.8.14,100,4,d,sqrt((x2-x1)**2+(y2-y1)**2),4,x1,1,5,x2,1,5,y1,1,5,y2,1,5,,,,,,,,,,,,,,,,,, -I.9.18,1000000,5,F,G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2),9,m1,1,2,m2,1,2,G,1,2,x1,3,4,x2,1,2,y1,3,4,y2,1,2,z1,3,4,z2,1,2,,, -I.10.7,10,6,m,m_0/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,, -I.11.19,100,7,A,x1*y1+x2*y2+x3*y3,6,x1,1,5,x2,1,5,x3,1,5,y1,1,5,y2,1,5,y3,1,5,,,,,,,,,,,, -I.12.1,10,8,F,mu*Nn,2,mu,1,5,Nn,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.12.2,10,10,F,q1*q2*r/(4*pi*epsilon*r**3),4,q1,1,5,q2,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,, -I.12.4,10,11,Ef,q1*r/(4*pi*epsilon*r**3),3,q1,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,, -I.12.5,10,12,F,q2*Ef,2,q2,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.12.11,10,13,F,q*(Ef+B*v*sin(theta)),5,q,1,5,Ef,1,5,B,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,, -I.13.4,10,9,K,1/2*m*(v**2+u**2+w**2),4,m,1,5,v,1,5,u,1,5,w,1,5,,,,,,,,,,,,,,,,,, -I.13.12,10,14,U,G*m1*m2*(1/r2-1/r1),5,m1,1,5,m2,1,5,r1,1,5,r2,1,5,G,1,5,,,,,,,,,,,,,,, -I.14.3,10,15,U,m*g*z,3,m,1,5,g,1,5,z,1,5,,,,,,,,,,,,,,,,,,,,, -I.14.4,10,16,U,1/2*k_spring*x**2,2,k_spring,1,5,x,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.15.3x,10,17,x1,(x-u*t)/sqrt(1-u**2/c**2),4,x,5,10,u,1,2,c,3,20,t,1,2,,,,,,,,,,,,,,,,,, -I.15.3t,100,18,t1,(t-u*x/c**2)/sqrt(1-u**2/c**2),4,x,1,5,c,3,10,u,1,2,t,1,5,,,,,,,,,,,,,,,,,, -I.15.1,10,19,p,m_0*v/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,, -I.16.6,10,20,v1,(u+v)/(1+u*v/c**2),3,c,1,5,v,1,5,u,1,5,,,,,,,,,,,,,,,,,,,,, -I.18.4,10,21,r,(m1*r1+m2*r2)/(m1+m2),4,m1,1,5,m2,1,5,r1,1,5,r2,1,5,,,,,,,,,,,,,,,,,, -I.18.12,10,22,tau,r*F*sin(theta),3,r,1,5,F,1,5,theta,0,5,,,,,,,,,,,,,,,,,,,,, -I.18.14,10,23,L,m*r*v*sin(theta),4,m,1,5,r,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,,,, -I.24.6,10,24,E_n,1/2*m*(omega**2+omega_0**2)*1/2*x**2,4,m,1,3,omega,1,3,omega_0,1,3,x,1,3,,,,,,,,,,,,,,,,,, -I.25.13,10,25,Volt,q/C,2,q,1,5,C,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.26.2,100,26,theta1,arcsin(n*sin(theta2)),2,n,0,1,theta2,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.27.6,10,27,foc,1/(1/d1+n/d2),3,d1,1,5,d2,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,, -I.29.4,10,28,k,omega/c,2,omega,1,10,c,1,10,,,,,,,,,,,,,,,,,,,,,,,, -I.29.16,1000,29,x,sqrt(x1**2+x2**2-2*x1*x2*cos(theta1-theta2)),4,x1,1,5,x2,1,5,theta1,1,5,theta2,1,5,,,,,,,,,,,,,,,,,, -I.30.3,100,30,Int,Int_0*sin(n*theta/2)**2/sin(theta/2)**2,3,Int_0,1,5,theta,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,, -I.30.5,100,31,theta,arcsin(lambd/(n*d)),3,lambd,1,2,d,2,5,n,1,5,,,,,,,,,,,,,,,,,,,,, -I.32.5,10,32,Pwr,q**2*a**2/(6*pi*epsilon*c**3),4,q,1,5,a,1,5,epsilon,1,5,c,1,5,,,,,,,,,,,,,,,,,, -I.32.17,10,33,Pwr,(1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2),6,epsilon,1,2,c,1,2,Ef,1,2,r,1,2,omega,1,2,omega_0,3,5,,,,,,,,,,,, -I.34.8,10,34,omega,q*v*B/p,4,q,1,5,v,1,5,B,1,5,p,1,5,,,,,,,,,,,,,,,,,, -I.34.1,10,35,omega,omega_0/(1-v/c),3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,, -I.34.14,10,36,omega,(1+v/c)/sqrt(1-v**2/c**2)*omega_0,3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,, -I.34.27,10,37,E_n,(h/(2*pi))*omega,2,omega,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.37.4,100,38,Int,I1+I2+2*sqrt(I1*I2)*cos(delta),3,I1,1,5,I2,1,5,delta,1,5,,,,,,,,,,,,,,,,,,,,, -I.38.12,10,39,r,4*pi*epsilon*(h/(2*pi))**2/(m*q**2),4,m,1,5,q,1,5,h,1,5,epsilon,1,5,,,,,,,,,,,,,,,,,, -I.39.1,10,40,E_n,3/2*pr*V,2,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,,,, -I.39.11,10,41,E_n,1/(gamma-1)*pr*V,3,gamma,2,5,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,, -I.39.22,10,42,pr,n*kb*T/V,4,n,1,5,T,1,5,V,1,5,kb,1,5,,,,,,,,,,,,,,,,,, -I.40.1,10,43,n,n_0*exp(-m*g*x/(kb*T)),6,n_0,1,5,m,1,5,x,1,5,T,1,5,g,1,5,kb,1,5,,,,,,,,,,,, -I.41.16,10,44,L_rad,h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)),5,omega,1,5,T,1,5,h,1,5,kb,1,5,c,1,5,,,,,,,,,,,,,,, -I.43.16,10,45,v,mu_drift*q*Volt/d,4,mu_drift,1,5,q,1,5,Volt,1,5,d,1,5,,,,,,,,,,,,,,,,,, -I.43.31,10,46,D,mob*kb*T,3,mob,1,5,T,1,5,kb,1,5,,,,,,,,,,,,,,,,,,,,, -I.43.43,10,47,kappa,1/(gamma-1)*kb*v/A,4,gamma,2,5,kb,1,5,A,1,5,v,1,5,,,,,,,,,,,,,,,,,, -I.44.4,10,48,E_n,n*kb*T*ln(V2/V1),5,n,1,5,kb,1,5,T,1,5,V1,1,5,V2,1,5,,,,,,,,,,,,,,, -I.47.23,10,49,c,sqrt(gamma*pr/rho),3,gamma,1,5,pr,1,5,rho,1,5,,,,,,,,,,,,,,,,,,,,, -I.48.2,100,50,E_n,m*c**2/sqrt(1-v**2/c**2),3,m,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,, -I.50.26,10,51,x,x1*(cos(omega*t)+alpha*cos(omega*t)**2),4,x1,1,3,omega,1,3,t,1,3,alpha,1,3,,,,,,,,,,,,,,,,,, -II.2.42,10,52,Pwr,kappa*(T2-T1)*A/d,5,kappa,1,5,T1,1,5,T2,1,5,A,1,5,d,1,5,,,,,,,,,,,,,,, -II.3.24,10,53,flux,Pwr/(4*pi*r**2),2,Pwr,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,,,, -II.4.23,10,54,Volt,q/(4*pi*epsilon*r),3,q,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,, -II.6.11,10,55,Volt,1/(4*pi*epsilon)*p_d*cos(theta)/r**2,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,, -II.6.15a,1000,56,Ef,p_d/(4*pi*epsilon)*3*z/r**5*sqrt(x**2+y**2),6,epsilon,1,3,p_d,1,3,r,1,3,x,1,3,y,1,3,z,1,3,,,,,,,,,,,, -II.6.15b,10,57,Ef,p_d/(4*pi*epsilon)*3*cos(theta)*sin(theta)/r**3,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,, -II.8.7,10,58,E_n,3/5*q**2/(4*pi*epsilon*d),3,q,1,5,epsilon,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,, -II.8.31,10,59,E_den,epsilon*Ef**2/2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,, -II.10.9,10,60,Ef,sigma_den/epsilon*1/(1+chi),3,sigma_den,1,5,epsilon,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,, -II.11.3,10,61,x,q*Ef/(m*(omega_0**2-omega**2)),5,q,1,3,Ef,1,3,m,1,3,omega_0,3,5,omega,1,2,,,,,,,,,,,,,,, -II.11.17,10,62,n,n_0*(1+p_d*Ef*cos(theta)/(kb*T)),6,n_0,1,3,kb,1,3,T,1,3,theta,1,3,p_d,1,3,Ef,1,3,,,,,,,,,,,, -II.11.20,10,63,Pol,n_rho*p_d**2*Ef/(3*kb*T),5,n_rho,1,5,p_d,1,5,Ef,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,, -II.11.27,100,64,Pol,n*alpha/(1-(n*alpha/3))*epsilon*Ef,4,n,0,1,alpha,0,1,epsilon,1,2,Ef,1,2,,,,,,,,,,,,,,,,,, -II.11.28,100,65,theta,1+n*alpha/(1-(n*alpha/3)),2,n,0,1,alpha,0,1,,,,,,,,,,,,,,,,,,,,,,,, -II.13.17,10,66,B,1/(4*pi*epsilon*c**2)*2*I/r,4,epsilon,1,5,c,1,5,I,1,5,r,1,5,,,,,,,,,,,,,,,,,, -II.13.23,100,67,rho_c,rho_c_0/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,, -II.13.34,10,68,j,rho_c_0*v/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,, -II.15.4,10,69,E_n,-mom*B*cos(theta),3,mom,1,5,B,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,, -II.15.5,10,70,E_n,-p_d*Ef*cos(theta),3,p_d,1,5,Ef,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,, -II.21.32,10,71,Volt,q/(4*pi*epsilon*r*(1-v/c)),5,q,1,5,epsilon,1,5,r,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,, -II.24.17,10,72,k,sqrt(omega**2/c**2-pi**2/d**2),3,omega,4,6,c,1,2,d,2,4,,,,,,,,,,,,,,,,,,,,, -II.27.16,10,73,flux,epsilon*c*Ef**2,3,epsilon,1,5,c,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,, -II.27.18,10,74,E_den,epsilon*Ef**2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,, -II.34.2a,10,75,I,q*v/(2*pi*r),3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,, -II.34.2,10,76,mom,q*v*r/2,3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,, -II.34.11,10,77,omega,g_*q*B/(2*m),4,g_,1,5,q,1,5,B,1,5,m,1,5,,,,,,,,,,,,,,,,,, -II.34.29a,10,78,mom,q*h/(4*pi*m),3,q,1,5,h,1,5,m,1,5,,,,,,,,,,,,,,,,,,,,, -II.34.29b,10,79,E_n,g_*mom*B*Jz/(h/(2*pi)),5,g_,1,5,h,1,5,Jz,1,5,mom,1,5,B,1,5,,,,,,,,,,,,,,, -II.35.18,10,80,n,n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T))),5,n_0,1,3,kb,1,3,T,1,3,mom,1,3,B,1,3,,,,,,,,,,,,,,, -II.35.21,10,81,M,n_rho*mom*tanh(mom*B/(kb*T)),5,n_rho,1,5,mom,1,5,B,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,, -II.36.38,10,82,f,mom*H/(kb*T)+(mom*alpha)/(epsilon*c**2*kb*T)*M,8,mom,1,3,H,1,3,kb,1,3,T,1,3,alpha,1,3,epsilon,1,3,c,1,3,M,1,3,,,,,, -II.37.1,10,83,E_n,mom*(1+chi)*B,3,mom,1,5,B,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,, -II.38.3,10,84,F,Y*A*x/d,4,Y,1,5,A,1,5,d,1,5,x,1,5,,,,,,,,,,,,,,,,,, -II.38.14,10,85,mu_S,Y/(2*(1+sigma)),2,Y,1,5,sigma,1,5,,,,,,,,,,,,,,,,,,,,,,,, -III.4.32,10,86,n,1/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,, -III.4.33,10,87,E_n,(h/(2*pi))*omega/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,, -III.7.38,10,88,omega,2*mom*B/(h/(2*pi)),3,mom,1,5,B,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,, -III.8.54,10,89,prob,sin(E_n*t/(h/(2*pi)))**2,3,E_n,1,2,t,1,2,h,1,4,,,,,,,,,,,,,,,,,,,,, -III.9.52,1000,90,prob,(p_d*Ef*t/(h/(2*pi)))*sin((omega-omega_0)*t/2)**2/((omega-omega_0)*t/2)**2,6,p_d,1,3,Ef,1,3,t,1,3,h,1,3,omega,1,5,omega_0,1,5,,,,,,,,,,,, -III.10.19,100,91,E_n,mom*sqrt(Bx**2+By**2+Bz**2),4,mom,1,5,Bx,1,5,By,1,5,Bz,1,5,,,,,,,,,,,,,,,,,, -III.12.43,10,92,L,n*(h/(2*pi)),2,n,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,, -III.13.18,10,93,v,2*E_n*d**2*k/(h/(2*pi)),4,E_n,1,5,d,1,5,k,1,5,h,1,5,,,,,,,,,,,,,,,,,, -III.14.14,10,94,I,I_0*(exp(q*Volt/(kb*T))-1),5,I_0,1,5,q,1,2,Volt,1,2,kb,1,2,T,1,2,,,,,,,,,,,,,,, -III.15.12,10,95,E_n,2*U*(1-cos(k*d)),3,U,1,5,k,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,, -III.15.14,10,96,m,(h/(2*pi))**2/(2*E_n*d**2),3,h,1,5,E_n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,, -III.15.27,10,97,k,2*pi*alpha/(n*d),3,alpha,1,5,n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,, -III.17.37,10,98,f,beta*(1+alpha*cos(theta)),3,beta,1,5,alpha,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,, -III.19.51,10,99,E_n,-m*q**4/(2*(4*pi*epsilon)**2*(h/(2*pi))**2)*(1/n**2),5,m,1,5,q,1,5,h,1,5,n,1,5,epsilon,1,5,,,,,,,,,,,,,,, -III.21.20,10,100,j,-rho_c_0*q*A_vec/m,4,rho_c_0,1,5,q,1,5,A_vec,1,5,m,1,5,,,,,,,,,,,,,,,,,, diff --git a/docs/all_contributors/.all-contributorsrc b/docs/all_contributors/.all-contributorsrc index 3346ab62..1db9ef06 100644 --- a/docs/all_contributors/.all-contributorsrc +++ b/docs/all_contributors/.all-contributorsrc @@ -108,6 +108,19 @@ "test" ] }, + { + "login": "cjdoris", + "name": "Christopher Rowley", + "avatar_url": "https://avatars.githubusercontent.com/u/1844215?v=4", + "profile": "https://cjdoris.github.io/", + "contributions": [ + "code", + "ideas", + "infra", + "platform", + "review" + ] + }, { "login": "kazewong", "name": "Kaze Wong", @@ -285,6 +298,31 @@ "userTesting" ] }, + { + "login": "foxtran", + "name": "foxtran", + "avatar_url": "https://avatars.githubusercontent.com/u/39676482?v=4", + "profile": "https://github.com/foxtran", + "contributions": [ + "code", + "ideas", + "maintenance", + "tool", + "userTesting" + ] + }, + { + "login": "tanweer-mahdi", + "name": "Shah Mahdi Hasan ", + "avatar_url": "https://avatars.githubusercontent.com/u/36223598?v=4", + "profile": "https://smhasan.com/", + "contributions": [ + "bug", + "code", + "review", + "userTesting" + ] + }, { "login": "pitmonticone", "name": "Pietro Monticone", @@ -417,6 +455,47 @@ "research" ] }, + { + "login": "ZehaoJin", + "name": "Zehao Jin", + "avatar_url": "https://avatars.githubusercontent.com/u/50961376?v=4", + "profile": "https://github.com/ZehaoJin", + "contributions": [ + "research", + "promotion" + ] + }, + { + "login": "tmengel", + "name": "Tanner Mengel", + "avatar_url": "https://avatars.githubusercontent.com/u/38924390?v=4", + "profile": "https://github.com/tmengel", + "contributions": [ + "research", + "promotion" + ] + }, + { + "login": "agrundner24", + "name": "Arthur Grundner", + "avatar_url": "https://avatars.githubusercontent.com/u/38557656?v=4", + "profile": "https://github.com/agrundner24", + "contributions": [ + "research", + "promotion" + ] + }, + { + "login": "sjwetzel", + "name": "sjwetzel", + "avatar_url": "https://avatars.githubusercontent.com/u/24393721?v=4", + "profile": "https://github.com/sjwetzel", + "contributions": [ + "research", + "promotion", + "userTesting" + ] + }, { "login": "SauravMaheshkar", "name": "Saurav Maheshkar", diff --git a/docs/backend.md b/docs/backend.md index 0c7afdbf..b7575d14 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -2,27 +2,73 @@ If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend. The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl). -This package is accessed with [`PyJulia`](https://github.com/JuliaPy/pyjulia), which allows us to transfer objects back and forth between the Python and Julia runtimes. +This package is accessed with [`juliacall`](https://github.com/JuliaPy/PythonCall.jl), which allows us to transfer objects back and forth between the Python and Julia runtimes. PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself. Generally you can do this as follows: -1. Clone a copy of the backend: -``` +## 1. Check out the source code + +Clone a copy of the backend as well as PySR: + +```bash git clone https://github.com/MilesCranmer/SymbolicRegression.jl +git clone https://github.com/MilesCranmer/PySR +``` + +You may wish to check out the specific versions, which you can do with: + +```bash +cd PySR +git checkout + +# You can see the current backend version in `pysr/juliapkg.json` +cd ../SymbolicRegression.jl +git checkout +``` + +## 2. Edit the source to your requirements + +The main search code can be found in `src/SymbolicRegression.jl`. + +Here are some tips: + +- The documentation for the backend is given [here](https://astroautomata.com/SymbolicRegression.jl/dev/). +- Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time. +- Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://astroautomata.com/SymbolicRegression.jl/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node). +- For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl). +- Parts of the code which are typically edited by users include: + - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions. + - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function. + - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options. + +## 3. Let PySR use the modified backend + +Once you have made your changes, you should edit the `pysr/juliapkg.json` file +in the PySR repository to point to this local copy. +Do this by removing the `"version"` key and adding a `"dev"` and `"path"` key: + +```json + ... + "packages": { + "SymbolicRegression": { + "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb", + "dev": true, + "path": "/path/to/SymbolicRegression.jl" + }, + ... ``` -2. Edit the source code in `src/` to your requirements: - - The documentation for the backend is given [here](https://astroautomata.com/SymbolicRegression.jl/dev/). - - Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time. - - Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://astroautomata.com/SymbolicRegression.jl/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node). - - Parts of the code which are typically edited by users include: - - [`src/LossFunctions.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/LossFunctions.jl), particularly the function `eval_loss`. This function assigns a loss to a given expression, using `eval_tree_array` to evaluate it, and `loss` to compute the loss with respect to the dataset. - - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions. - - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function. - - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options. - - For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl). -3. Specify the directory of `SymbolicRegression.jl` to PySR by setting `julia_project` in the `PySRRegressor` object, and run `.fit` when you're ready. That's it! No compilation or build steps required. - - Note that it will automatically update your project by default; to turn this off, set `update=False`. + +You can then install PySR with this modified backend by running: + +```bash +cd PySR +pip install . +``` + +For more information on `juliapkg.json`, see [`pyjuliapkg`](https://github.com/JuliaPy/pyjuliapkg). + +## Additional notes If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://astroautomata.com/SymbolicRegression.jl/dev/). diff --git a/docs/examples.md b/docs/examples.md index 1b3f7e52..754875e7 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -144,7 +144,7 @@ but there are still some additional steps you can take to reduce the effect of n One thing you could do, which we won't detail here, is to create a custom log-likelihood given some assumed noise model. By passing weights to the fit function, and -defining a custom loss function such as `loss="myloss(x, y, w) = w * (x - y)^2"`, +defining a custom loss function such as `elementwise_loss="myloss(x, y, w) = w * (x - y)^2"`, you can define any sort of log-likelihood you wish. (However, note that it must be bounded at zero) However, the simplest thing to do is preprocessing, just like for feature selection. To do this, @@ -189,12 +189,10 @@ where $p_i$ is the $i$th prime number, and $x$ is the input feature. Let's see if we can discover this using the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package. -First, let's manually initialize the Julia backend -(here, with 8 threads and `-O3`): +First, let's get the Julia backend: ```python -import pysr -jl = pysr.julia_helpers.init_julia(julia_kwargs={"threads": 8, "optimize": 3}) +from pysr import jl ``` `jl` stores the Julia runtime. @@ -203,7 +201,7 @@ Now, let's run some Julia code to add the Primes.jl package to the PySR environment: ```python -jl.eval(""" +jl.seval(""" import Pkg Pkg.add("Primes") """) @@ -213,13 +211,13 @@ This imports the Julia package manager, and uses it to install `Primes.jl`. Now let's import `Primes.jl`: ```python -jl.eval("import Primes") +jl.seval("import Primes") ``` Now, we define a custom operator: ```python -jl.eval(""" +jl.seval(""" function p(i::T) where T if (0.5 < i < 1000) return T(Primes.prime(round(Int, i))) @@ -237,7 +235,7 @@ If in-bounds, it rounds it to the nearest integer, compures the corresponding pr converts it to the same type as input. Next, let's generate a list of primes for our test dataset. -Since we are using PyJulia, we can just call `p` directly to do this: +Since we are using juliacall, we can just call `p` directly to do this: ```python primes = {i: jl.p(i*1.0) for i in range(1, 999)} @@ -382,7 +380,7 @@ end model = PySRRegressor( niterations=100, binary_operators=["*", "+", "-"], - full_objective=objective, + loss_function=objective, ) ``` @@ -430,7 +428,7 @@ the evaluation, as we simply evaluated each argument and divided the result) int `((2.3554819 + -0.3554746) - (x1 * (x0 * x0)))` and `(-1.0000019 - (x2 * x2))`, meaning that our discovered equation is equal to: -$\frac{x_0^2 x_1 - 2.0000073}{x_2^2 - 1.0000019}$, which +$\frac{x_0^2 x_1 - 2.0000073}{x_2^2 + 1.0000019}$, which is nearly the same as the true equation! ## 10. Dimensional constraints @@ -464,7 +462,7 @@ let's also create a custom loss function that looks at the error in log-space: ```python -loss = """function loss_fnc(prediction, target) +elementwise_loss = """function loss_fnc(prediction, target) scatter_loss = abs(log((abs(prediction)+1e-20) / (abs(target)+1e-20))) sign_loss = 10 * (sign(prediction) - sign(target))^2 return scatter_loss + sign_loss @@ -478,7 +476,7 @@ Now let's define our model: model = PySRRegressor( binary_operators=["+", "-", "*", "/"], unary_operators=["square"], - loss=loss, + elementwise_loss=elementwise_loss, complexity_of_constants=2, maxsize=25, niterations=100, @@ -522,6 +520,8 @@ a constant `"2.6353e-22[m sโปยฒ]"`. Note that this expression has a large dynamic range so may be difficult to find. Consider searching with a larger `niterations` if needed. +Note that you can also search for exclusively dimensionless constants by settings +`dimensionless_constants_only` to `true`. ## 11. Additional features diff --git a/docs/generate_papers.py b/docs/generate_papers.py index 1ac82fc1..637de01c 100644 --- a/docs/generate_papers.py +++ b/docs/generate_papers.py @@ -1,4 +1,5 @@ """This script generates the papers.md file from the papers.yml file.""" + from pathlib import Path import yaml @@ -35,11 +36,16 @@ abstract = paper["abstract"] image_file = paper["image"] + if image_file.startswith("http"): + absolute_image_file = image_file + else: + absolute_image_file = f"images/{image_file}" + # Begin: paper_snippet = f"""
-![](images/{image_file}){{ width="500"}} +![]({absolute_image_file}){{ width="500"}}

diff --git a/docs/operators.md b/docs/operators.md index a469d21b..db538d8b 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -56,7 +56,6 @@ A selection of these and other valid operators are stated below. - `round` - `floor` - `ceil` -- `round` - `sign` ## Custom diff --git a/docs/options.md b/docs/options.md index 5eee94d8..0ccacbca 100644 --- a/docs/options.md +++ b/docs/options.md @@ -78,11 +78,11 @@ with the equations. Each cycle considers every 10-equation subsample (re-sampled for each individual 10, unless `fast_cycle` is set in which case the subsamples are separate groups of equations) a single time, producing one mutated equation for each. -The parameter `ncyclesperiteration` defines how many times this +The parameter `ncycles_per_iteration` defines how many times this occurs before the equations are compared to the hall of fame, and new equations are migrated from the hall of fame, or from other populations. It also controls how slowly annealing occurs. You may find that increasing -`ncyclesperiteration` results in a higher cycles-per-second, as the head +`ncycles_per_iteration` results in a higher cycles-per-second, as the head worker needs to reduce and distribute new equations less often, and also increases diversity. But at the same time, a smaller number it might be that migrating equations from the hall of fame helps @@ -243,7 +243,7 @@ train the parameters within JAX (and is differentiable). The default loss is mean-square error, and weighted mean-square error. One can pass an arbitrary Julia string to define a custom loss, using, -e.g., `loss="myloss(x, y) = abs(x - y)^1.5"`. For more details, +e.g., `elementwise_loss="myloss(x, y) = abs(x - y)^1.5"`. For more details, see the [Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/) page for SymbolicRegression.jl. @@ -253,26 +253,26 @@ Here are some additional examples: abs(x-y) loss ```python -PySRRegressor(..., loss="f(x, y) = abs(x - y)^1.5") +PySRRegressor(..., elementwise_loss="f(x, y) = abs(x - y)^1.5") ``` Note that the function name doesn't matter: ```python -PySRRegressor(..., loss="loss(x, y) = abs(x * y)") +PySRRegressor(..., elementwise_loss="loss(x, y) = abs(x * y)") ``` With weights: ```python -model = PySRRegressor(..., loss="myloss(x, y, w) = w * abs(x - y)") +model = PySRRegressor(..., elementwise_loss="myloss(x, y, w) = w * abs(x - y)") model.fit(..., weights=weights) ``` Weights can be used in arbitrary ways: ```python -model = PySRRegressor(..., weights=weights, loss="myloss(x, y, w) = abs(x - y)^2/w^2") +model = PySRRegressor(..., weights=weights, elementwise_loss="myloss(x, y, w) = abs(x - y)^2/w^2") model.fit(..., weights=weights) ``` @@ -280,13 +280,13 @@ Built-in loss (faster) (see [losses](https://astroautomata.com/SymbolicRegressio This one computes the L3 norm: ```python -PySRRegressor(..., loss="LPDistLoss{3}()") +PySRRegressor(..., elementwise_loss="LPDistLoss{3}()") ``` Can also uses these losses for weighted (weighted-average): ```python -model = PySRRegressor(..., weights=weights, loss="LPDistLoss{3}()") +model = PySRRegressor(..., weights=weights, elementwise_loss="LPDistLoss{3}()") model.fit(..., weights=weights) ``` diff --git a/docs/papers.yml b/docs/papers.yml index 1558c071..b7911103 100644 --- a/docs/papers.yml +++ b/docs/papers.yml @@ -234,3 +234,14 @@ papers: abstract: "Electron transfer is the most elementary process in nature, but the existing electron transfer rules are seldom applied to high-pressure situations, such as in the deep Earth. Here we show a deep learning model to obtain the electronegativity of 96 elements under arbitrary pressure, and a regressed unified formula to quantify its relationship with pressure and electronic configuration. The relative work function of minerals is further predicted by electronegativity, presenting a decreasing trend with pressure because of pressure-induced electron delocalization. Using the work function as the case study of electronegativity, it reveals that the driving force behind directional electron transfer results from the enlarged work function difference between compounds with pressure. This well explains the deep high-conductivity anomalies, and helps discover the redox reactivity between widespread Fe(II)-bearing minerals and water during ongoing subduction. Our results give an insight into the fundamental physicochemical properties of elements and their compounds under pressure" image: electronnegativity_introduction.jpg date: 2023-03-31 + - title: Discovering interpretable models of scientific image data with deep learning + authors: + - Christopher J. Soelistyo (1) + - Alan R. Lowe (1, 2) + affiliations: + 1: The Alan Turing Institute + 2: University College London + link: https://arxiv.org/abs/2402.03115 + abstract: "How can we find interpretable, domain-appropriate models of natural phenomena given some complex, raw data such as images? Can we use such models to derive scientific insight from the data? In this paper, we propose some methods for achieving this. In particular, we implement disentangled representation learning, sparse deep neural network training and symbolic regression, and assess their usefulness in forming interpretable models of complex image data. We demonstrate their relevance to the field of bioimaging using a well-studied test problem of classifying cell states in microscopy data. We find that such methods can produce highly parsimonious models that achieve ~98% of the accuracy of black-box benchmark models, with a tiny fraction of the complexity. We explore the utility of such interpretable models in producing scientific explanations of the underlying biological phenomenon." + image: https://raw.githubusercontent.com/MilesCranmer/PySR_Docs/master/images/cell_state_classification.jpg + date: 2024-02-05 diff --git a/docs/tuning.md b/docs/tuning.md index aac4d126..455c21da 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -14,13 +14,13 @@ I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node o 2. Use only the operators I think it needs and no more. 3. Increase `populations` to `3*num_cores`. 4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data). -5. While on a laptop or single node machine, you might leave the default `ncyclesperiteration`, on a cluster with ~100 cores I like to set `ncyclesperiteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!) +5. While on a laptop or single node machine, you might leave the default `ncycles_per_iteration`, on a cluster with ~100 cores I like to set `ncycles_per_iteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!) 6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!) 7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore. 8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore. 9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`. -10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncyclesperiteration` is large, so that optimization happens more frequently. -11. Set `turbo` to `True`. This may or not work, if there's an error just turn it off (some operators are not SIMD-capable). If it does work, it should give you a nice 20% speedup. +10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently. +11. Set `bumper` to `True`. This turns on bump allocation but is experimental. It should give you a nice 20% speedup. 12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early. Since I am running in IPython, I can just hit `q` and then `` to stop the job, tweak the hyperparameters, and then start the search again. diff --git a/environment.yml b/environment.yml index 9eea9376..24ba80de 100644 --- a/environment.yml +++ b/environment.yml @@ -2,12 +2,11 @@ name: test channels: - conda-forge dependencies: - - sympy - - pandas - - numpy - - scikit-learn - - setuptools - - pyjulia - - openlibm - - openspecfun - - click + - python>=3.8 + - sympy>=1.0.0,<2.0.0 + - pandas>=0.21.0,<3.0.0 + - numpy>=1.13.0,<2.0.0 + - scikit-learn>=1.0.0,<2.0.0 + - pyjuliacall>=0.9.15,<0.10.0 + - click>=7.0.0,<9.0.0 + - typing_extensions>=4.0.0,<5.0.0 diff --git a/example.py b/example.py index e3b7deee..c39cab9c 100644 --- a/example.py +++ b/example.py @@ -18,7 +18,7 @@ ], extra_sympy_mappings={"inv": lambda x: 1 / x}, # ^ Define operator for SymPy as well - loss="loss(x, y) = (x - y)^2", + elementwise_loss="loss(x, y) = (x - y)^2", # ^ Custom loss function (julia syntax) ) diff --git a/examples/pysr_demo.ipynb b/examples/pysr_demo.ipynb index 31d47598..53606cba 100644 --- a/examples/pysr_demo.ipynb +++ b/examples/pysr_demo.ipynb @@ -15,129 +15,49 @@ "id": "tQ1r1bbb0yBv" }, "source": [ - "\n", "## Instructions\n", "1. Work on a copy of this notebook: _File_ > _Save a copy in Drive_ (you will need a Google account).\n", - "2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n", - "3. Execute the following cell (click on it and press Ctrl+Enter) to install Julia, IJulia and other packages (if needed, update `JULIA_VERSION` and the other parameters). This takes a couple of minutes.\n", - "4. Continue to the next section.\n", - "\n", - "_Notes_:\n", - "* If your Colab Runtime gets reset (e.g., due to inactivity), repeat steps 3, 4.\n", - "* After installation, if you want to change the Julia version or activate/deactivate the GPU, you will need to reset the Runtime: _Runtime_ > _Delete and disconnect runtime_ and repeat steps 2-4." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "COndi88gbDgO" - }, - "source": [ - "**Run the following code to install Julia**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GIeFXS0F0zww" - }, - "outputs": [], - "source": [ - "%%shell\n", - "set -e\n", - "\n", - "#---------------------------------------------------#\n", - "JULIA_VERSION=\"1.8.5\"\n", - "export JULIA_PKG_PRECOMPILE_AUTO=0\n", - "#---------------------------------------------------#\n", - "\n", - "if [ -z `which julia` ]; then\n", - " # Install Julia\n", - " JULIA_VER=`cut -d '.' -f -2 <<< \"$JULIA_VERSION\"`\n", - " echo \"Installing Julia $JULIA_VERSION on the current Colab Runtime...\"\n", - " BASE_URL=\"https://julialang-s3.julialang.org/bin/linux/x64\"\n", - " URL=\"$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz\"\n", - " wget -nv $URL -O /tmp/julia.tar.gz # -nv means \"not verbose\"\n", - " tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1\n", - " rm /tmp/julia.tar.gz\n", - "\n", - " echo \"Installing PyCall.jl...\"\n", - " julia -e 'using Pkg; Pkg.add(\"PyCall\"); Pkg.build(\"PyCall\")'\n", - " julia -e 'println(\"Success\")'\n", - "\n", - "fi" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ORv1c6xvbDgV" - }, - "source": [ - "Install PySR and PyTorch-Lightning:" + "2. (Optional) If you would like to do the deep learning component of this tutorial, turn on the GPU with Edit->Notebook settings->Hardware accelerator->GPU\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "EhMRSZEYFPLz" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EhMRSZEYFPLz", + "outputId": "e3aad3cb-d921-473e-b77b-8fa6a3a9e2e8" }, "outputs": [], "source": [ - "%pip install -Uq pysr pytorch_lightning" + "!pip install -U pysr" ] }, { "cell_type": "markdown", - "metadata": { - "id": "etTMEV0wDqld" - }, + "metadata": {}, "source": [ - "The following step is not normally required, but colab's printing is non-standard and we need to manually set it up PyJulia:\n" + "Julia and Julia dependencies are installed at first import:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "j666aOI8xWF_" - }, + "metadata": {}, "outputs": [], "source": [ - "from julia import Julia\n", - "\n", - "julia = Julia(compiled_modules=False, threads=\"auto\")\n", - "from julia import Main\n", - "from julia.tools import redirect_output_streams\n", - "\n", - "redirect_output_streams()" + "import pysr" ] }, { "cell_type": "markdown", "metadata": { - "id": "6u2WhbVhht-G" - }, - "source": [ - "Let's install the backend of PySR, and all required libraries.\n", - "\n", - "**(This may take some time)**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "J-0QbxyK1_51" + "id": "qeCPKd9wldEK" }, - "outputs": [], "source": [ - "import pysr\n", - "\n", - "# We don't precompile in colab because compiled modules are incompatible static Python libraries:\n", - "pysr.install(precompile=False)" + "Now, let's import everything else as well as the PySRRegressor:\n" ] }, { @@ -227,16 +147,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "p4PSrO-NK1Wa" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "p4PSrO-NK1Wa", + "outputId": "55910ab3-895d-400b-e9ce-c75aef639c68" }, "outputs": [], "source": [ "# Learn equations\n", "model = PySRRegressor(\n", " niterations=30,\n", - " binary_operators=[\"plus\", \"mult\"],\n", + " binary_operators=[\"+\", \"*\"],\n", " unary_operators=[\"cos\", \"exp\", \"sin\"],\n", - " **default_pysr_params\n", + " **default_pysr_params,\n", ")\n", "\n", "model.fit(X, y)" @@ -255,7 +180,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "4HR8gknlZz4W" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 252 + }, + "id": "4HR8gknlZz4W", + "outputId": "496283bd-a743-4cc6-a2f9-9619ba91d870" }, "outputs": [], "source": [ @@ -275,7 +205,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "IQKOohdpztS7" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 38 + }, + "id": "IQKOohdpztS7", + "outputId": "0e7d058a-cce1-45ae-db94-6625f7e53a06" }, "outputs": [], "source": [ @@ -295,7 +230,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "GRcxq-TTlpRX" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 39 + }, + "id": "GRcxq-TTlpRX", + "outputId": "50bda367-1ed1-4860-8fcf-c940f2e4d935" }, "outputs": [], "source": [ @@ -324,7 +264,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "HFGaNL6tbDgi" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "HFGaNL6tbDgi", + "outputId": "0f364da5-e18d-4e31-cadf-087d641a3aed" }, "outputs": [], "source": [ @@ -346,7 +291,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "Vbz4IMsk2NYH" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Vbz4IMsk2NYH", + "outputId": "361d4b6e-ac23-479d-b511-5001af05ca43" }, "outputs": [], "source": [ @@ -406,14 +355,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "PoEkpvYuGUdy" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 339 + }, + "id": "PoEkpvYuGUdy", + "outputId": "02834373-a054-400b-8247-2bf33a5c5beb" }, "outputs": [], "source": [ "model = PySRRegressor(\n", " niterations=5,\n", " populations=40,\n", - " binary_operators=[\"plus\", \"mult\"],\n", + " binary_operators=[\"+\", \"*\"],\n", " unary_operators=[\"cos\", \"exp\", \"sin\", \"quart(x) = x^4\"],\n", " extra_sympy_mappings={\"quart\": lambda x: x**4},\n", ")\n", @@ -424,7 +378,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "emn2IajKbDgy" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 38 + }, + "id": "emn2IajKbDgy", + "outputId": "11d5d3cf-de43-4f2b-f653-30016e09bdd0" }, "outputs": [], "source": [ @@ -546,7 +505,12 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "sqMqb4nJ5ZR5" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 467 + }, + "id": "sqMqb4nJ5ZR5", + "outputId": "aa24922b-2395-4e00-dce3-268fc8e603dc" }, "outputs": [], "source": [ @@ -579,7 +543,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "v8WBYtcZbDhC" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v8WBYtcZbDhC", + "outputId": "37d4002f-e9d6-40c0-9a24-c671d9c384e6" }, "outputs": [], "source": [ @@ -599,15 +567,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "a07K3KUjOxcp" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a07K3KUjOxcp", + "outputId": "41d11915-78b7-4446-c153-b92a5e2abd4c" }, "outputs": [], "source": [ "model = PySRRegressor(\n", - " loss=\"myloss(x, y, w) = w * abs(x - y)\", # Custom loss function with weights.\n", + " elementwise_loss=\"myloss(x, y, w) = w * abs(x - y)\", # Custom loss function with weights.\n", " niterations=20,\n", " populations=20, # Use more populations\n", - " binary_operators=[\"plus\", \"mult\"],\n", + " binary_operators=[\"+\", \"*\"],\n", " unary_operators=[\"cos\"],\n", ")\n", "model.fit(X, y, weights=weights)" @@ -688,17 +660,19 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "2x-8M8W4G-KM" + }, "source": [ "# Multiple outputs" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "LIJcWqBQG-KM" + }, "source": [ "For multiple outputs, multiple equations are returned:" ] @@ -706,7 +680,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "_Aar1ZJwG-KM" + }, "outputs": [], "source": [ "X = 2 * np.random.randn(100, 5)\n", @@ -716,7 +692,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "9Znwq40PG-KM" + }, "outputs": [], "source": [ "model = PySRRegressor(\n", @@ -730,24 +708,28 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "0Y_vy0sqG-KM" + }, "outputs": [], "source": [ "model" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "-UP49CsGG-KN" + }, "source": [ "# Julia packages and types" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "tOdNHheUG-KN" + }, "source": [ "PySR uses [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl)\n", "as its search backend. This is a pure Julia package, and so can interface easily with any other\n", @@ -761,30 +743,14 @@ "where $p_i$ is the $i$th prime number, and $x$ is the input feature.\n", "\n", "Let's see if we can discover this using\n", - "the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package.\n", - "\n", - "First, let's get the Julia backend\n", - "Here, we might choose to manually specify unlimited threads, `-O3`,\n", - "and `compile_modules=False`, although this will only propagate if Julia has not yet started:" + "the [Primes.jl](https://github.com/JuliaMath/Primes.jl) package." ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pysr\n", - "\n", - "jl = pysr.julia_helpers.init_julia(\n", - " julia_kwargs={\"threads\": \"auto\", \"optimize\": 2, \"compiled_modules\": False}\n", - ")" - ] - }, - { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "af07m4uBG-KN" + }, "source": [ "\n", "\n", @@ -797,10 +763,14 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "xBlMY-s4G-KN" + }, "outputs": [], "source": [ - "jl.eval(\n", + "from pysr import jl\n", + "\n", + "jl.seval(\n", " \"\"\"\n", "import Pkg\n", "Pkg.add(\"Primes\")\n", @@ -809,9 +779,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "1rJFukD6G-KN" + }, "source": [ "This imports the Julia package manager, and uses it to install\n", "`Primes.jl`. Now let's import `Primes.jl`:" @@ -820,16 +791,36 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "1PQl1rIaG-KN" + }, "outputs": [], "source": [ - "jl.eval(\"import Primes\")" + "jl.seval(\"using Primes: prime\")" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, + "source": [ + "Note that PySR should automatically load the `juliacall.ipython` extension for you,\n", + "which means that you can also execute Julia code in the notebook using the `%%julia` magic:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%julia using Primes: prime" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "edGdMxKnG-KN" + }, "source": [ "\n", "Now, we define a custom operator:\n" @@ -838,14 +829,16 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "9Ut3HcW3G-KN" + }, "outputs": [], "source": [ - "jl.eval(\n", + "jl.seval(\n", " \"\"\"\n", "function p(i::T) where T\n", " if 0.5 < i < 1000\n", - " return T(Primes.prime(round(Int, i)))\n", + " return T(prime(round(Int, i)))\n", " else\n", " return T(NaN)\n", " end\n", @@ -855,9 +848,33 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, + "source": [ + "Or, equivalently:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%julia\n", + "function p(i::T) where T\n", + " if 0.5 < i < 1000\n", + " return T(prime(round(Int, i)))\n", + " else\n", + " return T(NaN)\n", + " end\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_wcV8889G-KN" + }, "source": [ "\n", "We have created a function `p`, which takes a number `i` of type `T` (e.g., `T=Float64`).\n", @@ -881,22 +898,25 @@ "(However, note that this version assumes 64-bit float input, rather than any input type `T`)\n", "\n", "Next, let's generate a list of primes for our test dataset.\n", - "Since we are using PyJulia, we can just call `p` directly to do this:\n" + "Since we are using juliacall, we can just call `p` directly to do this:\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "giqwisEPG-KN" + }, "outputs": [], "source": [ "primes = {i: jl.p(i * 1.0) for i in range(1, 999)}" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "MPAqARj6G-KO" + }, "source": [ "Next, let's use this list of primes to create a dataset of $x, y$ pairs:" ] @@ -904,7 +924,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "jab4tRRRG-KO" + }, "outputs": [], "source": [ "import numpy as np\n", @@ -914,9 +936,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "3eFgWrjcG-KO" + }, "source": [ "Note that we have also added a tiny bit of noise to the dataset.\n", "\n", @@ -926,7 +949,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "pEYskM2_G-KO" + }, "outputs": [], "source": [ "from pysr import PySRRegressor\n", @@ -947,8 +972,9 @@ }, { "cell_type": "markdown", - "id": "ee30bd41", - "metadata": {}, + "metadata": { + "id": "ee30bd41" + }, "source": [ "We are all set to go! Let's see if we can find the true relation:" ] @@ -956,16 +982,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "li-TB19iG-KO" + }, "outputs": [], "source": [ "model.fit(X, y)" ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "jwhTWZryG-KO" + }, "source": [ "if all works out, you should be able to see the true relation (note that the constant offset might not be exactly 1, since it is allowed to round to the nearest integer).\n", "\n", @@ -975,7 +1004,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "bSlpX9xAG-KO" + }, "outputs": [], "source": [ "model.sympy()" @@ -991,7 +1022,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": { "id": "3hS2kTAbbDhL" @@ -1068,6 +1098,17 @@ "> We import torch *after* already starting PyJulia. This is required due to interference between their C bindings. If you use torch, and then run PyJulia, you will likely hit a segfault. So keep this in mind for mixed deep learning + PyJulia/PySR workflows." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k-Od8b9DlkHK" + }, + "outputs": [], + "source": [ + "!pip install pytorch_lightning" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1083,7 +1124,7 @@ "import pytorch_lightning as pl\n", "\n", "hidden = 128\n", - "total_steps = 30_000\n", + "total_steps = 50_000\n", "\n", "\n", "def mlp(size_in, size_out, act=nn.ReLU):\n", @@ -1292,13 +1333,15 @@ "\n", "> **Warning**\n", ">\n", - "> First, let's save the data, because sometimes PyTorch and PyJulia's C bindings interfere and cause the colab kernel to crash. If we need to restart, we can just load the data without having to retrain the network:" + "> First, let's save the data, because sometimes PyTorch and juliacall's C bindings interfere and cause the colab kernel to crash. If we need to restart, we can just load the data without having to retrain the network:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "UX7Am6mZG-KT" + }, "outputs": [], "source": [ "nnet_recordings = {\n", @@ -1316,17 +1359,20 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "krhaNlwFG-KT" + }, "source": [ - "We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts PyJulia)." + "We can now load the data, including after a crash (be sure to re-run the import cells at the top of this notebook, including the one that starts juliacall)." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "NF9aSFXHG-KT" + }, "outputs": [], "source": [ "import pickle as pkl\n", @@ -1339,9 +1385,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "_hTYHhDGG-KT" + }, "source": [ "And now fit using a subsample of the data (symbolic regression only needs a small sample to find the best equation):" ] @@ -1358,9 +1405,9 @@ "f_sample_idx = rstate.choice(f_input.shape[0], size=500, replace=False)\n", "\n", "model = PySRRegressor(\n", - " niterations=20,\n", - " binary_operators=[\"plus\", \"sub\", \"mult\"],\n", - " unary_operators=[\"cos\", \"square\", \"neg\"],\n", + " niterations=50,\n", + " binary_operators=[\"+\", \"-\", \"*\"],\n", + " unary_operators=[\"cos\", \"square\"],\n", ")\n", "model.fit(g_input[f_sample_idx], g_output[f_sample_idx])" ] @@ -1371,7 +1418,7 @@ "id": "1a738a33" }, "source": [ - "If this segfaults, restart the notebook, and run the initial imports and PyJulia part, but skip the PyTorch training. This is because PyTorch's C binding tends to interefere with PyJulia. You can then re-run the `pkl.load` cell to import the data." + "If this segfaults, restart the notebook, and run the initial imports and juliacall part, but skip the PyTorch training. This is because PyTorch's C binding tends to interefere with juliacall. You can then re-run the `pkl.load` cell to import the data." ] }, { @@ -1384,14 +1431,13 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": { "id": "6WuaeqyqbDhe" }, "source": [ "Recall we are searching for $f$ and $g$ such that:\n", - "$$z=f(\\sum g(x_i))$$ \n", + "$$z=f(\\sum g(x_i))$$\n", "which approximates the true relation:\n", "$$ z = y^2,\\quad y = \\frac{1}{10} \\sum(y_i),\\quad y_i = x_{i0}^2 + 6 \\cos(2 x_{i2})$$\n", "\n", @@ -1459,7 +1505,6 @@ "metadata": { "accelerator": "GPU", "colab": { - "name": "pysr_demo.ipynb", "provenance": [] }, "gpuClass": "standard", diff --git a/mkdocs.yml b/mkdocs.yml index 8da8cc2d..cff11241 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -26,13 +26,12 @@ theme: nav: - index.md - examples.md + - api.md - operators.md - tuning.md - options.md - papers.md - - Reference: - - api.md - - api-advanced.md + - api-advanced.md - backend.md - interactive-docs.md diff --git a/pyproject.toml b/pyproject.toml index 5d7bf33d..94851ea6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,38 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "pysr" +version = "0.18.4" +authors = [ + {name = "Miles Cranmer", email = "miles.cranmer@gmail.com"}, +] +description = "Simple and efficient symbolic regression" +readme = {file = "README.md", content-type = "text/markdown"} +license = {file = "LICENSE"} +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "License :: OSI Approved :: Apache Software License" +] +dynamic = ["dependencies"] + +[tool.setuptools] +packages = ["pysr", "pysr._cli", "pysr.test"] +include-package-data = false +package-data = {pysr = ["juliapkg.json"]} + +[tool.setuptools.dynamic] +dependencies = {file = "requirements.txt"} + [tool.isort] profile = "black" + +[tool.rye] +dev-dependencies = [ + "pre-commit>=3.7.0", + "ipython>=8.23.0", + "ipykernel>=6.29.4", +] diff --git a/pysr/.gitignore b/pysr/.gitignore new file mode 100644 index 00000000..98527864 --- /dev/null +++ b/pysr/.gitignore @@ -0,0 +1 @@ +version.py diff --git a/pysr/__init__.py b/pysr/__init__.py index 99c6a974..e71e1990 100644 --- a/pysr/__init__.py +++ b/pysr/__init__.py @@ -1,17 +1,23 @@ +# This must be imported as early as possible to prevent +# library linking issues caused by numpy/pytorch/etc. importing +# old libraries: +from .julia_import import jl, SymbolicRegression # isort:skip + from . import sklearn_monkeypatch -from .deprecated import best, best_callable, best_row, best_tex, pysr +from .deprecated import best, best_callable, best_row, best_tex, install, pysr from .export_jax import sympy2jax from .export_torch import sympy2torch -from .feynman_problems import FeynmanProblem, Problem -from .julia_helpers import install from .sr import PySRRegressor + +# This file is created by setuptools_scm during the build process: from .version import __version__ __all__ = [ + "jl", + "SymbolicRegression", "sklearn_monkeypatch", "sympy2jax", "sympy2torch", - "FeynmanProblem", "Problem", "install", "PySRRegressor", diff --git a/pysr/__main__.py b/pysr/__main__.py index e8dbdf90..e196f3c4 100644 --- a/pysr/__main__.py +++ b/pysr/__main__.py @@ -1,4 +1,4 @@ -from pysr._cli.main import pysr as _cli +from ._cli.main import pysr as _cli if __name__ == "__main__": _cli(prog_name="pysr") diff --git a/pysr/_cli/main.py b/pysr/_cli/main.py index d82ab79e..b27b7ced 100644 --- a/pysr/_cli/main.py +++ b/pysr/_cli/main.py @@ -1,6 +1,18 @@ +import fnmatch +import sys +import unittest +import warnings + import click -from ..julia_helpers import install +from ..test import ( + get_runtests_cli, + runtests, + runtests_dev, + runtests_jax, + runtests_startup, + runtests_torch, +) @click.group("pysr") @@ -9,15 +21,13 @@ def pysr(context): ctx = context -@pysr.command("install", help="Install Julia dependencies for PySR.") +@pysr.command("install", help="DEPRECATED (dependencies are now installed at import).") @click.option( "-p", "julia_project", "--project", default=None, type=str, - help="Install in a specific Julia project (e.g., a local copy of SymbolicRegression.jl).", - metavar="PROJECT_DIRECTORY", ) @click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.") @click.option( @@ -25,14 +35,67 @@ def pysr(context): "precompile", flag_value=True, default=None, - help="Force precompilation of Julia libraries.", ) @click.option( "--no-precompile", "precompile", flag_value=False, default=None, - help="Disable precompilation.", ) def _install(julia_project, quiet, precompile): - install(julia_project, quiet, precompile) + warnings.warn( + "This command is deprecated. Julia dependencies are now installed at first import." + ) + + +TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"} + + +@pysr.command("test") +@click.argument("tests", nargs=1) +@click.option( + "-k", + "expressions", + multiple=True, + type=str, + help="Filter expressions to select specific tests.", +) +def _tests(tests, expressions): + """Run parts of the PySR test suite. + + Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas. + """ + test_cases = [] + for test in tests.split(","): + if test == "main": + test_cases.extend(runtests(just_tests=True)) + elif test == "jax": + test_cases.extend(runtests_jax(just_tests=True)) + elif test == "torch": + test_cases.extend(runtests_torch(just_tests=True)) + elif test == "cli": + runtests_cli = get_runtests_cli() + test_cases.extend(runtests_cli(just_tests=True)) + elif test == "dev": + test_cases.extend(runtests_dev(just_tests=True)) + elif test == "startup": + test_cases.extend(runtests_startup(just_tests=True)) + else: + warnings.warn(f"Invalid test {test}. Skipping.") + + loader = unittest.TestLoader() + suite = unittest.TestSuite() + for test_case in test_cases: + loaded_tests = loader.loadTestsFromTestCase(test_case) + for test in loaded_tests: + if len(expressions) == 0 or any( + fnmatch.fnmatch(test.id(), "*" + expression + "*") + for expression in expressions + ): + suite.addTest(test) + + runner = unittest.TextTestRunner() + results = runner.run(suite) + + if not results.wasSuccessful(): + sys.exit(1) diff --git a/pysr/denoising.py b/pysr/denoising.py index b6548452..a18f0017 100644 --- a/pysr/denoising.py +++ b/pysr/denoising.py @@ -1,4 +1,5 @@ """Functions for denoising data during preprocessing.""" + import numpy as np diff --git a/pysr/deprecated.py b/pysr/deprecated.py index ecfeb45e..8905f282 100644 --- a/pysr/deprecated.py +++ b/pysr/deprecated.py @@ -1,6 +1,28 @@ """Various functions to deprecate features.""" + import warnings +from .julia_import import jl + + +def install(*args, **kwargs): + del args, kwargs + warnings.warn( + "The `install` function has been removed. " + "PySR now uses the `juliacall` package to install its dependencies automatically at import time. ", + FutureWarning, + ) + + +def init_julia(*args, **kwargs): + del args, kwargs + warnings.warn( + "The `init_julia` function has been removed. " + "Julia is now initialized automatically at import time.", + FutureWarning, + ) + return jl + def pysr(X, y, weights=None, **kwargs): # pragma: no cover from .sr import PySRRegressor @@ -55,37 +77,28 @@ def best_callable(*args, **kwargs): # pragma: no cover ) -def make_deprecated_kwargs_for_pysr_regressor(): - """Create dict of deprecated kwargs.""" - - deprecation_string = """ - fractionReplaced => fraction_replaced - fractionReplacedHof => fraction_replaced_hof - npop => population_size - hofMigration => hof_migration - shouldOptimizeConstants => should_optimize_constants - weightAddNode => weight_add_node - weightDeleteNode => weight_delete_node - weightDoNothing => weight_do_nothing - weightInsertNode => weight_insert_node - weightMutateConstant => weight_mutate_constant - weightMutateOperator => weight_mutate_operator - weightRandomize => weight_randomize - weightSimplify => weight_simplify - crossoverProbability => crossover_probability - perturbationFactor => perturbation_factor - batchSize => batch_size - warmupMaxsizeBy => warmup_maxsize_by - useFrequency => use_frequency - useFrequencyInTournament => use_frequency_in_tournament - """ - # Turn this into a dict: - deprecated_kwargs = {} - for line in deprecation_string.splitlines(): - line = line.replace(" ", "") - if line == "": - continue - old, new = line.split("=>") - deprecated_kwargs[old] = new - - return deprecated_kwargs +DEPRECATED_KWARGS = { + "fractionReplaced": "fraction_replaced", + "fractionReplacedHof": "fraction_replaced_hof", + "npop": "population_size", + "hofMigration": "hof_migration", + "shouldOptimizeConstants": "should_optimize_constants", + "weightAddNode": "weight_add_node", + "weightDeleteNode": "weight_delete_node", + "weightDoNothing": "weight_do_nothing", + "weightInsertNode": "weight_insert_node", + "weightMutateConstant": "weight_mutate_constant", + "weightMutateOperator": "weight_mutate_operator", + "weightSwapOperands": "weight_swap_operands", + "weightRandomize": "weight_randomize", + "weightSimplify": "weight_simplify", + "crossoverProbability": "crossover_probability", + "perturbationFactor": "perturbation_factor", + "batchSize": "batch_size", + "warmupMaxsizeBy": "warmup_maxsize_by", + "useFrequency": "use_frequency", + "useFrequencyInTournament": "use_frequency_in_tournament", + "ncyclesperiteration": "ncycles_per_iteration", + "loss": "elementwise_loss", + "full_objective": "loss_function", +} diff --git a/pysr/export_latex.py b/pysr/export_latex.py index 0316f872..1fa9505b 100644 --- a/pysr/export_latex.py +++ b/pysr/export_latex.py @@ -1,4 +1,5 @@ """Functions to help export PySR equations to LaTeX.""" + from typing import List, Optional, Tuple import pandas as pd @@ -23,7 +24,7 @@ def sympy2latex(expr, prec=3, full_prec=True, **settings) -> str: """Convert sympy expression to LaTeX with custom precision.""" settings["full_prec"] = full_prec printer = PreciseLatexPrinter(settings=settings, prec=prec) - return printer.doprint(expr) + return str(printer.doprint(expr)) def generate_table_environment( diff --git a/pysr/export_numpy.py b/pysr/export_numpy.py index e1c4d56f..037bd240 100644 --- a/pysr/export_numpy.py +++ b/pysr/export_numpy.py @@ -1,4 +1,5 @@ """Code for exporting discovered expressions to numpy""" + import warnings import numpy as np diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py index f99a54a0..eeb50471 100644 --- a/pysr/export_sympy.py +++ b/pysr/export_sympy.py @@ -1,4 +1,5 @@ """Define utilities to export to sympy""" + from typing import Callable, Dict, List, Optional import sympy @@ -50,6 +51,7 @@ "round": lambda x: sympy.ceiling(x - 0.5), "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)), "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)), + "greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)), "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)), "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)), "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)), @@ -57,6 +59,12 @@ } +def create_sympy_symbols_map( + feature_names_in: List[str], +) -> Dict[str, sympy.Symbol]: + return {variable: sympy.Symbol(variable) for variable in feature_names_in} + + def create_sympy_symbols( feature_names_in: List[str], ) -> List[sympy.Symbol]: @@ -64,10 +72,16 @@ def create_sympy_symbols( def pysr2sympy( - equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None + equation: str, + *, + feature_names_in: Optional[List[str]] = None, + extra_sympy_mappings: Optional[Dict[str, Callable]] = None, ): + if feature_names_in is None: + feature_names_in = [] local_sympy_mappings = { - **(extra_sympy_mappings if extra_sympy_mappings else {}), + **create_sympy_symbols_map(feature_names_in), + **(extra_sympy_mappings if extra_sympy_mappings is not None else {}), **sympy_mappings, } diff --git a/pysr/feature_selection.py b/pysr/feature_selection.py index a6ebf039..69306e35 100644 --- a/pysr/feature_selection.py +++ b/pysr/feature_selection.py @@ -1,4 +1,5 @@ """Functions for doing feature selection during preprocessing.""" + import numpy as np diff --git a/pysr/feynman_problems.py b/pysr/feynman_problems.py deleted file mode 100644 index b64b4139..00000000 --- a/pysr/feynman_problems.py +++ /dev/null @@ -1,176 +0,0 @@ -import csv -from functools import partial -from pathlib import Path - -import numpy as np - -from .deprecated import best, pysr - -PKG_DIR = Path(__file__).parents[1] -FEYNMAN_DATASET = PKG_DIR / "datasets" / "FeynmanEquations.csv" - - -class Problem: - """ - Problem API to work with PySR. - - Has attributes: X, y as pysr accepts, form which is a string representing the correct equation and variable_names - - Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work - """ - - def __init__(self, X, y, form=None, variable_names=None): - self.X = X - self.y = y - self.form = form - self.variable_names = variable_names - - -class FeynmanProblem(Problem): - """ - Stores the data for the problems from the 100 Feynman Equations on Physics. - This is the benchmark used in the AI Feynman Paper - """ - - def __init__(self, row, gen=False, dp=500): - """ - row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo - gen: If true the problem will have dp X and y values randomly generated else they will be None - """ - self.eq_id = row["Filename"] - self.n_vars = int(row["# variables"]) - super(FeynmanProblem, self).__init__( - None, - None, - form=row["Formula"], - variable_names=[row[f"v{i + 1}_name"] for i in range(self.n_vars)], - ) - self.low = [float(row[f"v{i+1}_low"]) for i in range(self.n_vars)] - self.high = [float(row[f"v{i+1}_high"]) for i in range(self.n_vars)] - self.dp = dp - if gen: - self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars)) - d = {} - for var in range(len(self.variable_names)): - d[self.variable_names[var]] = self.X[:, var] - d["exp"] = np.exp - d["sqrt"] = np.sqrt - d["pi"] = np.pi - d["cos"] = np.cos - d["sin"] = np.sin - d["tan"] = np.tan - d["tanh"] = np.tanh - d["ln"] = np.log - d["log"] = np.log # Quite sure the Feynman dataset has no base 10 logs - d["arcsin"] = np.arcsin - self.y = eval(self.form, d) - - def __str__(self): - return f"Feynman Equation: {self.eq_id}|Form: {self.form}" - - def __repr__(self): - return str(self) - - -def mk_problems(first=100, gen=False, dp=500, data_dir=FEYNMAN_DATASET): - """ - - first: the first "first" equations from the dataset will be made into problems - data_dir: the path pointing to the Feynman Equations csv - returns: list of FeynmanProblems - """ - ret = [] - with open(data_dir) as csvfile: - reader = csv.DictReader(csvfile) - for i, row in enumerate(reader): - if i > first: - break - if row["Filename"] == "": - continue - p = FeynmanProblem(row, gen=gen, dp=dp) - ret.append(p) - return ret - - -def run_on_problem(problem, verbosity=0, multiprocessing=True): - """ - Takes in a problem and returns a tuple: (equations, best predicted equation, actual equation) - """ - from time import time - - starting = time() - equations = pysr( - problem.X, - problem.y, - variable_names=problem.variable_names, - verbosity=verbosity, - ) - timing = time() - starting - others = {"time": timing, "problem": problem} - if not multiprocessing: - others["equations"] = equations - return str(best(equations)), problem.form, others - - -def do_feynman_experiments_parallel( - first=100, - verbosity=0, - dp=500, - output_file_path="FeynmanExperiment.csv", - data_dir=FEYNMAN_DATASET, -): - import multiprocessing as mp - - from tqdm import tqdm - - problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir) - ids = [] - predictions = [] - true_equations = [] - time_takens = [] - pool = mp.Pool() - results = [] - with tqdm(total=len(problems)) as pbar: - f = partial(run_on_problem, verbosity=verbosity) - for i, res in enumerate(pool.imap(f, problems)): - results.append(res) - pbar.update() - for res in results: - prediction, true_equation, others = res - problem = others["problem"] - ids.append(problem.eq_id) - predictions.append(prediction) - true_equations.append(true_equation) - time_takens.append(others["time"]) - with open(output_file_path, "a") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow(["ID", "Predicted", "True", "Time"]) - for i in range(len(ids)): - writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]]) - - -def do_feynman_experiments( - first=100, - verbosity=0, - dp=500, - output_file_path="FeynmanExperiment.csv", - data_dir=FEYNMAN_DATASET, -): - from tqdm import tqdm - - problems = mk_problems(first=first, gen=True, dp=dp, data_dir=data_dir) - ids = [] - predictions = [] - true_equations = [] - time_takens = [] - for problem in tqdm(problems): - prediction, true_equation, others = run_on_problem(problem, verbosity) - ids.append(problem.eq_id) - predictions.append(prediction) - true_equations.append(true_equation) - time_takens.append(others["time"]) - with open(output_file_path, "a") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow(["ID", "Predicted", "True", "Time"]) - for i in range(len(ids)): - writer.writerow([ids[i], predictions[i], true_equations[i], time_takens[i]]) diff --git a/pysr/julia_extensions.py b/pysr/julia_extensions.py new file mode 100644 index 00000000..5c537105 --- /dev/null +++ b/pysr/julia_extensions.py @@ -0,0 +1,36 @@ +"""This file installs and loads extensions for SymbolicRegression.""" + +from typing import Optional + +from .julia_import import Pkg, jl + + +def load_required_packages( + *, + turbo: bool = False, + bumper: bool = False, + enable_autodiff: bool = False, + cluster_manager: Optional[str] = None, +): + if turbo: + load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890") + if bumper: + load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e") + if enable_autodiff: + load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f") + if cluster_manager is not None: + load_package("ClusterManagers", "34f1f09b-3a8b-5176-ab39-66d58a4d544e") + + +def isinstalled(uuid_s: str): + return jl.haskey(Pkg.dependencies(), jl.Base.UUID(uuid_s)) + + +def load_package(package_name: str, uuid_s: str) -> None: + if not isinstalled(uuid_s): + Pkg.add(name=package_name, uuid=uuid_s) + + # TODO: Protect against loading the same symbol from two packages, + # maybe with a @gensym here. + jl.seval(f"using {package_name}: {package_name}") + return None diff --git a/pysr/julia_helpers.py b/pysr/julia_helpers.py index e2f76090..ad8341d4 100644 --- a/pysr/julia_helpers.py +++ b/pysr/julia_helpers.py @@ -1,284 +1,18 @@ """Functions for initializing the Julia environment and installing deps.""" -import os -import subprocess -import sys -import warnings -from pathlib import Path -from julia.api import JuliaError +import numpy as np +from juliacall import convert as jl_convert # type: ignore -from .version import __symbolic_regression_jl_version__, __version__ +from .deprecated import init_julia, install +from .julia_import import jl -juliainfo = None -julia_initialized = False -julia_kwargs_at_initialization = None -julia_activated_env = None +jl.seval("using Serialization: Serialization") +jl.seval("using PythonCall: PythonCall") +Serialization = jl.Serialization +PythonCall = jl.PythonCall -def _load_juliainfo(): - """Execute julia.core.JuliaInfo.load(), and store as juliainfo.""" - global juliainfo - - if juliainfo is None: - from julia.core import JuliaInfo - - try: - juliainfo = JuliaInfo.load(julia="julia") - except FileNotFoundError: - env_path = os.environ["PATH"] - raise FileNotFoundError( - f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}", - ) - - return juliainfo - - -def _get_julia_env_dir(): - # Have to manually get env dir: - try: - julia_env_dir_str = subprocess.run( - ["julia", "-e using Pkg; print(Pkg.envdir())"], - capture_output=True, - env=os.environ, - ).stdout.decode() - except FileNotFoundError: - env_path = os.environ["PATH"] - raise FileNotFoundError( - f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}", - ) - return Path(julia_env_dir_str) - - -def _set_julia_project_env(julia_project, is_shared): - if is_shared: - if is_julia_version_greater_eq(version=(1, 7, 0)): - os.environ["JULIA_PROJECT"] = "@" + str(julia_project) - else: - julia_env_dir = _get_julia_env_dir() - os.environ["JULIA_PROJECT"] = str(julia_env_dir / julia_project) - else: - os.environ["JULIA_PROJECT"] = str(julia_project) - - -def _get_io_arg(quiet): - io = "devnull" if quiet else "stderr" - io_arg = f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else "" - return io_arg - - -def install(julia_project=None, quiet=False, precompile=None): # pragma: no cover - """ - Install PyCall.jl and all required dependencies for SymbolicRegression.jl. - - Also updates the local Julia registry. - """ - import julia - - _julia_version_assertion() - # Set JULIA_PROJECT so that we install in the pysr environment - processed_julia_project, is_shared = _process_julia_project(julia_project) - _set_julia_project_env(processed_julia_project, is_shared) - - if precompile == False: - os.environ["JULIA_PKG_PRECOMPILE_AUTO"] = "0" - - try: - julia.install(quiet=quiet) - except julia.tools.PyCallInstallError: - # Attempt to reset PyCall.jl's build: - subprocess.run( - [ - "julia", - "-e", - f'ENV["PYTHON"] = "{sys.executable}"; import Pkg; Pkg.build("PyCall")', - ], - ) - # Try installing again: - try: - julia.install(quiet=quiet) - except julia.tools.PyCallInstallError: - warnings.warn( - "PyCall.jl failed to install on second attempt. " - + "Please consult the GitHub issue " - + "https://github.com/MilesCranmer/PySR/issues/257 " - + "for advice on fixing this." - ) - - Main, init_log = init_julia(julia_project, quiet=quiet, return_aux=True) - io_arg = _get_io_arg(quiet) - - if precompile is None: - precompile = init_log["compiled_modules"] - - if not precompile: - Main.eval('ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0') - - if is_shared: - # Install SymbolicRegression.jl: - _add_sr_to_julia_project(Main, io_arg) - - Main.eval("using Pkg") - Main.eval(f"Pkg.instantiate({io_arg})") - - if precompile: - Main.eval(f"Pkg.precompile({io_arg})") - - if not quiet: - warnings.warn( - "It is recommended to restart Python after installing PySR's dependencies," - " so that the Julia environment is properly initialized." - ) - - -def _import_error(): - return """ - Required dependencies are not installed or built. Run the following command in your terminal: - python3 -m pysr install - """ - - -def _process_julia_project(julia_project): - if julia_project is None: - is_shared = True - processed_julia_project = f"pysr-{__version__}" - elif julia_project[0] == "@": - is_shared = True - processed_julia_project = julia_project[1:] - else: - is_shared = False - processed_julia_project = Path(julia_project) - return processed_julia_project, is_shared - - -def is_julia_version_greater_eq(juliainfo=None, version=(1, 6, 0)): - """Check if Julia version is greater than specified version.""" - if juliainfo is None: - juliainfo = _load_juliainfo() - current_version = ( - juliainfo.version_major, - juliainfo.version_minor, - juliainfo.version_patch, - ) - return current_version >= version - - -def _check_for_conflicting_libraries(): # pragma: no cover - """Check whether there are conflicting modules, and display warnings.""" - # See https://github.com/pytorch/pytorch/issues/78829: importing - # pytorch before running `pysr.fit` causes a segfault. - torch_is_loaded = "torch" in sys.modules - if torch_is_loaded: - warnings.warn( - "`torch` was loaded before the Julia instance started. " - "This may cause a segfault when running `PySRRegressor.fit`. " - "To avoid this, please run `pysr.julia_helpers.init_julia()` *before* " - "importing `torch`. " - "For updates, see https://github.com/pytorch/pytorch/issues/78829" - ) - - -def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=False): - """Initialize julia binary, turning off compiled modules if needed.""" - global julia_initialized - global julia_kwargs_at_initialization - global julia_activated_env - - if not julia_initialized: - _check_for_conflicting_libraries() - - if julia_kwargs is None: - julia_kwargs = {"optimize": 3} - - from julia.core import JuliaInfo, UnsupportedPythonError - - _julia_version_assertion() - processed_julia_project, is_shared = _process_julia_project(julia_project) - _set_julia_project_env(processed_julia_project, is_shared) - - try: - info = JuliaInfo.load(julia="julia") - except FileNotFoundError: - env_path = os.environ["PATH"] - raise FileNotFoundError( - f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}", - ) - - if not info.is_pycall_built(): - raise ImportError(_import_error()) - - from julia.core import Julia - - try: - Julia(**julia_kwargs) - except UnsupportedPythonError: - # Static python binary, so we turn off pre-compiled modules. - julia_kwargs = {**julia_kwargs, "compiled_modules": False} - Julia(**julia_kwargs) - warnings.warn( - "Your system's Python library is static (e.g., conda), so precompilation will be turned off. For a dynamic library, try using `pyenv` and installing with `--enable-shared`: https://github.com/pyenv/pyenv/blob/master/plugins/python-build/README.md#building-with---enable-shared." - ) - - using_compiled_modules = (not "compiled_modules" in julia_kwargs) or julia_kwargs[ - "compiled_modules" - ] - - from julia import Main as _Main - - Main = _Main - - if julia_activated_env is None: - julia_activated_env = processed_julia_project - - if julia_initialized and julia_kwargs_at_initialization is not None: - # Check if the kwargs are the same as the previous initialization - init_set = set(julia_kwargs_at_initialization.items()) - new_set = set(julia_kwargs.items()) - set_diff = new_set - init_set - # Remove the `compiled_modules` key, since it is not a user-specified kwarg: - set_diff = {k: v for k, v in set_diff if k != "compiled_modules"} - if len(set_diff) > 0: - warnings.warn( - "Julia has already started. The new Julia options " - + str(set_diff) - + " will be ignored." - ) - - if julia_initialized and julia_activated_env != processed_julia_project: - Main.eval("using Pkg") - - io_arg = _get_io_arg(quiet) - # Can't pass IO to Julia call as it evaluates to PyObject, so just directly - # use Main.eval: - Main.eval( - f'Pkg.activate("{_escape_filename(processed_julia_project)}",' - f"shared = Bool({int(is_shared)}), " - f"{io_arg})" - ) - - julia_activated_env = processed_julia_project - - if not julia_initialized: - julia_kwargs_at_initialization = julia_kwargs - - julia_initialized = True - if return_aux: - return Main, {"compiled_modules": using_compiled_modules} - return Main - - -def _add_sr_to_julia_project(Main, io_arg): - Main.eval("using Pkg") - Main.eval("Pkg.Registry.update()") - Main.sr_spec = Main.PackageSpec( - name="SymbolicRegression", - url="https://github.com/MilesCranmer/SymbolicRegression.jl", - rev="v" + __symbolic_regression_jl_version__, - ) - Main.clustermanagers_spec = Main.PackageSpec( - name="ClusterManagers", - version="0.4", - ) - Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})") +jl.seval("using SymbolicRegression: plus, sub, mult, div, pow") def _escape_filename(filename): @@ -288,60 +22,27 @@ def _escape_filename(filename): return str_repr -def _julia_version_assertion(): - if not is_julia_version_greater_eq(version=(1, 6, 0)): - raise NotImplementedError( - "PySR requires Julia 1.6.0 or greater. " - "Please update your Julia installation." - ) - - -def _backend_version_assertion(Main): - try: - backend_version = Main.eval("string(SymbolicRegression.PACKAGE_VERSION)") - expected_backend_version = __symbolic_regression_jl_version__ - if backend_version != expected_backend_version: # pragma: no cover - warnings.warn( - f"PySR backend (SymbolicRegression.jl) version {backend_version} " - f"does not match expected version {expected_backend_version}. " - "Things may break. " - "Please update your PySR installation with " - "`python3 -m pysr install`." - ) - except JuliaError: # pragma: no cover - warnings.warn( - "You seem to have an outdated version of SymbolicRegression.jl. " - "Things may break. " - "Please update your PySR installation with " - "`python3 -m pysr install`." - ) - - -def _load_cluster_manager(Main, cluster_manager): - Main.eval(f"import ClusterManagers: addprocs_{cluster_manager}") - return Main.eval(f"addprocs_{cluster_manager}") - +def _load_cluster_manager(cluster_manager): + jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}") + return jl.seval(f"addprocs_{cluster_manager}") -def _update_julia_project(Main, is_shared, io_arg): - try: - if is_shared: - _add_sr_to_julia_project(Main, io_arg) - Main.eval("using Pkg") - Main.eval(f"Pkg.resolve({io_arg})") - except (JuliaError, RuntimeError) as e: - raise ImportError(_import_error()) from e +def jl_array(x): + if x is None: + return None + return jl_convert(jl.Array, x) -def _load_backend(Main): - try: - # Load namespace, so that various internal operators work: - Main.eval("using SymbolicRegression") - except (JuliaError, RuntimeError) as e: - raise ImportError(_import_error()) from e - _backend_version_assertion(Main) +def jl_serialize(obj): + buf = jl.IOBuffer() + Serialization.serialize(buf, obj) + return np.array(jl.take_b(buf)) - # Load Julia package SymbolicRegression.jl - from julia import SymbolicRegression - return SymbolicRegression +def jl_deserialize(s): + if s is None: + return s + buf = jl.IOBuffer() + jl.write(buf, jl_array(s)) + jl.seekstart(buf) + return Serialization.deserialize(buf) diff --git a/pysr/julia_import.py b/pysr/julia_import.py new file mode 100644 index 00000000..f1c15513 --- /dev/null +++ b/pysr/julia_import.py @@ -0,0 +1,51 @@ +import os +import sys +import warnings + +# Check if JuliaCall is already loaded, and if so, warn the user +# about the relevant environment variables. If not loaded, +# set up sensible defaults. +if "juliacall" in sys.modules: + warnings.warn( + "juliacall module already imported. " + "Make sure that you have set the environment variable `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` to avoid segfaults. " + "Also note that PySR will not be able to configure `PYTHON_JULIACALL_THREADS` or `PYTHON_JULIACALL_OPTLEVEL` for you." + ) +else: + # Required to avoid segfaults (https://juliapy.github.io/PythonCall.jl/dev/faq/) + if os.environ.get("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes") != "yes": + warnings.warn( + "PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set to something other than 'yes' or ''. " + + "You will experience segfaults if running with multithreading." + ) + + if os.environ.get("PYTHON_JULIACALL_THREADS", "auto") != "auto": + warnings.warn( + "PYTHON_JULIACALL_THREADS environment variable is set to something other than 'auto', " + "so PySR was not able to set it. You may wish to set it to `'auto'` for full use " + "of your CPU." + ) + + # TODO: Remove these when juliapkg lets you specify this + for k, default in ( + ("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"), + ("PYTHON_JULIACALL_THREADS", "auto"), + ("PYTHON_JULIACALL_OPTLEVEL", "3"), + ): + os.environ[k] = os.environ.get(k, default) + + +autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS") +if autoload_extensions is not None: + # Deprecated; so just pass to juliacall + os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions + +from juliacall import Main as jl # type: ignore + +jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch) + +jl.seval("using SymbolicRegression") +SymbolicRegression = jl.SymbolicRegression + +jl.seval("using Pkg: Pkg") +Pkg = jl.Pkg diff --git a/pysr/juliapkg.json b/pysr/juliapkg.json new file mode 100644 index 00000000..db8de4ec --- /dev/null +++ b/pysr/juliapkg.json @@ -0,0 +1,13 @@ +{ + "julia": "~1.6.7, ~1.7, ~1.8, ~1.9, =1.10.0, ^1.10.3", + "packages": { + "SymbolicRegression": { + "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb", + "version": "=0.24.4" + }, + "Serialization": { + "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b", + "version": "1" + } + } +} diff --git a/pysr/param_groupings.yml b/pysr/param_groupings.yml index 08d6fa5d..0ff9d63d 100644 --- a/pysr/param_groupings.yml +++ b/pysr/param_groupings.yml @@ -8,12 +8,13 @@ - niterations - populations - population_size - - ncyclesperiteration + - ncycles_per_iteration - The Objective: - - loss - - full_objective + - elementwise_loss + - loss_function - model_selection - dimensional_constraint_penalty + - dimensionless_constants_only - Working with Complexities: - parsimony - constraints @@ -33,6 +34,7 @@ - weight_do_nothing - weight_mutate_constant - weight_mutate_operator + - weight_swap_operands - weight_randomize - weight_simplify - weight_optimize @@ -67,11 +69,13 @@ - procs - multithreading - cluster_manager + - heap_size_hint_in_bytes - batching - batch_size - precision - fast_cycle - turbo + - bumper - enable_autodiff - Determinism: - random_state @@ -86,9 +90,7 @@ - temp_equation_file - tempdir - delete_tempfiles - - julia_project - update - - julia_kwargs - Exporting the Results: - equation_file - output_jax_format diff --git a/pysr/sklearn_monkeypatch.py b/pysr/sklearn_monkeypatch.py index 2d7d1dbe..dfdcd165 100644 --- a/pysr/sklearn_monkeypatch.py +++ b/pysr/sklearn_monkeypatch.py @@ -3,11 +3,10 @@ from sklearn.utils import validation -def _ensure_no_complex_data(*args, **kwargs): - ... +def _ensure_no_complex_data(*args, **kwargs): ... try: validation._ensure_no_complex_data = _ensure_no_complex_data -except AttributeError: +except AttributeError: # pragma: no cover ... diff --git a/pysr/sr.py b/pysr/sr.py index ef99693d..2f976ec1 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -1,4 +1,5 @@ """Define the PySRRegressor scikit-learn interface.""" + import copy import os import pickle as pkl @@ -11,7 +12,12 @@ from io import StringIO from multiprocessing import cpu_count from pathlib import Path -from typing import List, Optional +from typing import Callable, Dict, List, Optional, Tuple, Union + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal import numpy as np import pandas as pd @@ -20,22 +26,23 @@ from sklearn.utils.validation import _check_feature_names_in, check_is_fitted from .denoising import denoise, multi_denoise -from .deprecated import make_deprecated_kwargs_for_pysr_regressor +from .deprecated import DEPRECATED_KWARGS from .export_jax import sympy2jax from .export_latex import sympy2latex, sympy2latextable, sympy2multilatextable from .export_numpy import sympy2numpy from .export_sympy import assert_valid_sympy_symbol, create_sympy_symbols, pysr2sympy from .export_torch import sympy2torch from .feature_selection import run_feature_selection +from .julia_extensions import load_required_packages from .julia_helpers import ( + PythonCall, _escape_filename, - _load_backend, _load_cluster_manager, - _process_julia_project, - _update_julia_project, - init_julia, - is_julia_version_greater_eq, + jl_array, + jl_deserialize, + jl_serialize, ) +from .julia_import import SymbolicRegression, jl from .utils import ( _csv_filename_to_pkl_filename, _preprocess_julia_floats, @@ -43,8 +50,6 @@ _subscriptify, ) -Main = None # TODO: Rename to more descriptive name like "julia_runtime" - already_ran = False @@ -87,7 +92,6 @@ def _process_constraints(binary_operators, unary_operators, constraints): def _maybe_create_inline_operators( binary_operators, unary_operators, extra_sympy_mappings ): - global Main binary_operators = binary_operators.copy() unary_operators = unary_operators.copy() for op_list in [binary_operators, unary_operators]: @@ -95,7 +99,7 @@ def _maybe_create_inline_operators( is_user_defined_operator = "(" in op if is_user_defined_operator: - Main.eval(op) + jl.seval(op) # Cut off from the first non-alphanumeric char: first_non_char = [j for j, char in enumerate(op) if char == "("][0] function_name = op[:first_non_char] @@ -171,7 +175,7 @@ def _check_assertions( # Class validation constants -VALID_OPTIMIZER_ALGORITHMS = ["NelderMead", "BFGS"] +VALID_OPTIMIZER_ALGORITHMS = ["BFGS", "NelderMead"] class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): @@ -266,7 +270,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): arguments are treated the same way, and the max of each argument is constrained. Default is `None`. - loss : str + elementwise_loss : str String of Julia code specifying an elementwise loss function. Can either be a loss from LossFunctions.jl, or your own loss written as a function. Examples of custom written losses include: @@ -282,11 +286,11 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`, `SigmoidLoss()`, `DWDMarginLoss(q)`. Default is `"L2DistLoss()"`. - full_objective : str + loss_function : str Alternatively, you can specify the full objective function as a snippet of Julia code, including any sort of custom evaluation (including symbolic manipulations beforehand), and any sort - of loss function or regularizations. The default `full_objective` + of loss function or regularizations. The default `loss_function` used in SymbolicRegression.jl is roughly equal to: ```julia function eval_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L} @@ -324,6 +328,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): dimensional_constraint_penalty : float Additive penalty for if dimensional analysis of an expression fails. By default, this is `1000.0`. + dimensionless_constants_only : bool + Whether to only search for dimensionless constants, if using units. + Default is `False`. use_frequency : bool Whether to measure the frequency of complexities, and use that instead of parsimony to explore equation space. Will naturally @@ -352,7 +359,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): takes a loss and complexity as input, for example: `"f(loss, complexity) = (loss < 0.1) && (complexity < 10)"`. Default is `None`. - ncyclesperiteration : int + ncycles_per_iteration : int Number of total mutations to run, per 10 samples of the population, per iteration. Default is `550`. @@ -382,6 +389,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): weight_mutate_operator : float Relative likelihood for mutation to swap an operator. Default is `0.47`. + weight_swap_operands : float + Relative likehood for swapping operands in binary operators. + Default is `0.1`. weight_randomize : float Relative likelihood for mutation to completely delete and then randomly generate the equation @@ -393,7 +403,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): Constant optimization can also be performed as a mutation, in addition to the normal strategy controlled by `optimize_probability` which happens every iteration. Using it as a mutation is useful if you want to use - a large `ncyclesperiteration`, and may not optimize very often. + a large `ncycles_periteration`, and may not optimize very often. Default is `0.0`. crossover_probability : float Absolute probability of crossover-type genetic operation, instead of a mutation. @@ -455,6 +465,12 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): "htc". If set to one of these, PySR will run in distributed mode, and use `procs` to figure out how many processes to launch. Default is `None`. + heap_size_hint_in_bytes : int + For multiprocessing, this sets the `--heap-size-hint` parameter + for new Julia processes. This can be configured when using + multi-node distributed compute, to give a hint to each process + about how much memory they can use before aggressive garbage + collection. batching : bool Whether to compare population members on small batches during evolution. Still uses full dataset for comparing against hall @@ -471,6 +487,10 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): search evaluation. Certain operators may not be supported. Does not support 16-bit precision floats. Default is `False`. + bumper: bool + (Experimental) Whether to use Bumper.jl to speed up the search + evaluation. Does not support 16-bit precision floats. + Default is `False`. precision : int What precision to use for the data. By default this is `32` (float32), but you can select `64` or `16` as well, giving @@ -522,11 +542,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): delete_tempfiles : bool Whether to delete the temporary files after finishing. Default is `True`. - julia_project : str - A Julia environment location containing a Project.toml - (and potentially the source code for SymbolicRegression.jl). - Default gives the Python package directory, where a - Project.toml file should be present from the install. update: bool Whether to automatically update Julia packages when `fit` is called. You should make sure that PySR is up-to-date itself first, as @@ -571,11 +586,6 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): before passing to the symbolic regression code. None means no feature selection; an int means select that many features. Default is `None`. - julia_kwargs : dict - Keyword arguments to pass to `julia.core.Julia(...)` to initialize - the Julia runtime. The default, when `None`, is to set `threads` equal - to `procs`, and `optimize` to 3. - Default is `None`. **kwargs : dict Supports deprecated keyword arguments. Other arguments will result in an error. @@ -603,12 +613,15 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): Path to the temporary equations directory. equation_file_ : str Output equation file name produced by the julia backend. - sr_state_ : tuple[list[PyCall.jlwrap], PyCall.jlwrap] - The state for the julia SymbolicRegression.jl backend post fitting. - sr_options_ : PyCall.jlwrap - The options used by `SymbolicRegression.jl`, created during - a call to `.fit`. You may use this to manually call functions - in `SymbolicRegression` which take an `::Options` argument. + julia_state_stream_ : ndarray + The serialized state for the julia SymbolicRegression.jl backend (after fitting), + stored as an array of uint8, produced by Julia's Serialization.serialize function. + julia_state_ + The deserialized state. + julia_options_stream_ : ndarray + The serialized julia options, stored as an array of uint8, + julia_options_ + The deserialized julia options. equation_file_contents_ : list[pandas.DataFrame] Contents of the equation file output by the Julia backend. show_pickle_warnings_ : bool @@ -633,7 +646,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): ... "inv(x) = 1/x", # Custom operator (julia syntax) ... ], ... model_selection="best", - ... loss="loss(x, y) = (x - y)^2", # Custom loss function (julia syntax) + ... elementwise_loss="loss(x, y) = (x - y)^2", # Custom loss function (julia syntax) ... ) >>> model.fit(X, y) >>> model @@ -657,89 +670,93 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): def __init__( self, - model_selection="best", + model_selection: Literal["best", "accuracy", "score"] = "best", *, - binary_operators=None, - unary_operators=None, - niterations=40, - populations=15, - population_size=33, - max_evals=None, - maxsize=20, - maxdepth=None, - warmup_maxsize_by=0.0, - timeout_in_seconds=None, - constraints=None, - nested_constraints=None, - loss=None, - full_objective=None, - complexity_of_operators=None, - complexity_of_constants=1, - complexity_of_variables=1, - parsimony=0.0032, - dimensional_constraint_penalty=None, - use_frequency=True, - use_frequency_in_tournament=True, - adaptive_parsimony_scaling=20.0, - alpha=0.1, - annealing=False, - early_stop_condition=None, - ncyclesperiteration=550, - fraction_replaced=0.000364, - fraction_replaced_hof=0.035, - weight_add_node=0.79, - weight_insert_node=5.1, - weight_delete_node=1.7, - weight_do_nothing=0.21, - weight_mutate_constant=0.048, - weight_mutate_operator=0.47, - weight_randomize=0.00023, - weight_simplify=0.0020, - weight_optimize=0.0, - crossover_probability=0.066, - skip_mutation_failures=True, - migration=True, - hof_migration=True, - topn=12, - should_simplify=None, - should_optimize_constants=True, - optimizer_algorithm="BFGS", - optimizer_nrestarts=2, - optimize_probability=0.14, - optimizer_iterations=8, - perturbation_factor=0.076, - tournament_selection_n=10, - tournament_selection_p=0.86, - procs=cpu_count(), - multithreading=None, - cluster_manager=None, - batching=False, - batch_size=50, - fast_cycle=False, - turbo=False, - precision=32, - enable_autodiff=False, + binary_operators: Optional[List[str]] = None, + unary_operators: Optional[List[str]] = None, + niterations: int = 40, + populations: int = 15, + population_size: int = 33, + max_evals: Optional[int] = None, + maxsize: int = 20, + maxdepth: Optional[int] = None, + warmup_maxsize_by: Optional[float] = None, + timeout_in_seconds: Optional[float] = None, + constraints: Optional[Dict[str, Union[int, Tuple[int, int]]]] = None, + nested_constraints: Optional[Dict[str, Dict[str, int]]] = None, + elementwise_loss: Optional[str] = None, + loss_function: Optional[str] = None, + complexity_of_operators: Optional[Dict[str, Union[int, float]]] = None, + complexity_of_constants: Union[int, float] = 1, + complexity_of_variables: Union[int, float] = 1, + parsimony: float = 0.0032, + dimensional_constraint_penalty: Optional[float] = None, + dimensionless_constants_only: bool = False, + use_frequency: bool = True, + use_frequency_in_tournament: bool = True, + adaptive_parsimony_scaling: float = 20.0, + alpha: float = 0.1, + annealing: bool = False, + early_stop_condition: Optional[Union[float, str]] = None, + ncycles_per_iteration: int = 550, + fraction_replaced: float = 0.000364, + fraction_replaced_hof: float = 0.035, + weight_add_node: float = 0.79, + weight_insert_node: float = 5.1, + weight_delete_node: float = 1.7, + weight_do_nothing: float = 0.21, + weight_mutate_constant: float = 0.048, + weight_mutate_operator: float = 0.47, + weight_swap_operands: float = 0.1, + weight_randomize: float = 0.00023, + weight_simplify: float = 0.0020, + weight_optimize: float = 0.0, + crossover_probability: float = 0.066, + skip_mutation_failures: bool = True, + migration: bool = True, + hof_migration: bool = True, + topn: int = 12, + should_simplify: Optional[bool] = None, + should_optimize_constants: bool = True, + optimizer_algorithm: Literal["BFGS", "NelderMead"] = "BFGS", + optimizer_nrestarts: int = 2, + optimize_probability: float = 0.14, + optimizer_iterations: int = 8, + perturbation_factor: float = 0.076, + tournament_selection_n: int = 10, + tournament_selection_p: float = 0.86, + procs: int = cpu_count(), + multithreading: Optional[bool] = None, + cluster_manager: Optional[ + Literal["slurm", "pbs", "lsf", "sge", "qrsh", "scyld", "htc"] + ] = None, + heap_size_hint_in_bytes: Optional[int] = None, + batching: bool = False, + batch_size: int = 50, + fast_cycle: bool = False, + turbo: bool = False, + bumper: bool = False, + precision: int = 32, + enable_autodiff: bool = False, random_state=None, - deterministic=False, - warm_start=False, - verbosity=1, - update_verbosity=None, - print_precision=5, - progress=True, - equation_file=None, - temp_equation_file=False, - tempdir=None, - delete_tempfiles=True, - julia_project=None, - update=False, - output_jax_format=False, - output_torch_format=False, - extra_sympy_mappings=None, - extra_torch_mappings=None, - extra_jax_mappings=None, - denoise=False, - select_k_features=None, - julia_kwargs=None, + deterministic: bool = False, + warm_start: bool = False, + verbosity: int = 1, + update_verbosity: Optional[int] = None, + print_precision: int = 5, + progress: bool = True, + equation_file: Optional[str] = None, + temp_equation_file: bool = False, + tempdir: Optional[str] = None, + delete_tempfiles: bool = True, + update: bool = False, + output_jax_format: bool = False, + output_torch_format: bool = False, + extra_sympy_mappings: Optional[Dict[str, Callable]] = None, + extra_torch_mappings: Optional[Dict[Callable, Callable]] = None, + extra_jax_mappings: Optional[Dict[Callable, str]] = None, + denoise: bool = False, + select_k_features: Optional[int] = None, **kwargs, ): # Hyperparameters @@ -750,7 +767,7 @@ def __init__( self.niterations = niterations self.populations = populations self.population_size = population_size - self.ncyclesperiteration = ncyclesperiteration + self.ncycles_per_iteration = ncycles_per_iteration # - Equation Constraints self.maxsize = maxsize self.maxdepth = maxdepth @@ -763,13 +780,14 @@ def __init__( self.timeout_in_seconds = timeout_in_seconds self.early_stop_condition = early_stop_condition # - Loss parameters - self.loss = loss - self.full_objective = full_objective + self.elementwise_loss = elementwise_loss + self.loss_function = loss_function self.complexity_of_operators = complexity_of_operators self.complexity_of_constants = complexity_of_constants self.complexity_of_variables = complexity_of_variables self.parsimony = parsimony self.dimensional_constraint_penalty = dimensional_constraint_penalty + self.dimensionless_constants_only = dimensionless_constants_only self.use_frequency = use_frequency self.use_frequency_in_tournament = use_frequency_in_tournament self.adaptive_parsimony_scaling = adaptive_parsimony_scaling @@ -783,6 +801,7 @@ def __init__( self.weight_do_nothing = weight_do_nothing self.weight_mutate_constant = weight_mutate_constant self.weight_mutate_operator = weight_mutate_operator + self.weight_swap_operands = weight_swap_operands self.weight_randomize = weight_randomize self.weight_simplify = weight_simplify self.weight_optimize = weight_optimize @@ -804,14 +823,16 @@ def __init__( # -- Selection parameters self.tournament_selection_n = tournament_selection_n self.tournament_selection_p = tournament_selection_p - # Solver parameters + # -- Performance parameters self.procs = procs self.multithreading = multithreading self.cluster_manager = cluster_manager + self.heap_size_hint_in_bytes = heap_size_hint_in_bytes self.batching = batching self.batch_size = batch_size self.fast_cycle = fast_cycle self.turbo = turbo + self.bumper = bumper self.precision = precision self.enable_autodiff = enable_autodiff self.random_state = random_state @@ -828,7 +849,6 @@ def __init__( self.temp_equation_file = temp_equation_file self.tempdir = tempdir self.delete_tempfiles = delete_tempfiles - self.julia_project = julia_project self.update = update self.output_jax_format = output_jax_format self.output_torch_format = output_torch_format @@ -838,16 +858,14 @@ def __init__( # Pre-modelling transformation self.denoise = denoise self.select_k_features = select_k_features - self.julia_kwargs = julia_kwargs # Once all valid parameters have been assigned handle the # deprecated kwargs if len(kwargs) > 0: # pragma: no cover - deprecated_kwargs = make_deprecated_kwargs_for_pysr_regressor() for k, v in kwargs.items(): # Handle renamed kwargs - if k in deprecated_kwargs: - updated_kwarg_name = deprecated_kwargs[k] + if k in DEPRECATED_KWARGS: + updated_kwarg_name = DEPRECATED_KWARGS[k] setattr(self, updated_kwarg_name, v) warnings.warn( f"{k} has been renamed to {updated_kwarg_name} in PySRRegressor. " @@ -861,6 +879,19 @@ def __init__( f"Ignoring parameter; please pass {k} during the call to fit instead.", FutureWarning, ) + elif k == "julia_project": + warnings.warn( + "The `julia_project` parameter has been deprecated. To use a custom " + "julia project, please see `https://astroautomata.com/PySR/backend`.", + FutureWarning, + ) + elif k == "julia_kwargs": + warnings.warn( + "The `julia_kwargs` parameter has been deprecated. To pass custom " + "keyword arguments to the julia backend, you should use environment variables. " + "See the Julia documentation for more information.", + FutureWarning, + ) else: raise TypeError( f"{k} is not a valid keyword argument for PySRRegressor." @@ -1031,26 +1062,13 @@ def __getstate__(self): Handle pickle serialization for PySRRegressor. The Scikit-learn standard requires estimators to be serializable via - `pickle.dumps()`. However, `PyCall.jlwrap` does not support pickle - serialization. - - Thus, for `PySRRegressor` to support pickle serialization, the - `sr_state_` attribute must be hidden from pickle. This will - prevent the `warm_start` of any model that is loaded via `pickle.loads()`, - but does allow all other attributes of a fitted `PySRRegressor` estimator - to be serialized. Note: Jax and Torch format equations are also removed - from the pickled instance. + `pickle.dumps()`. However, some attributes do not support pickling + and need to be hidden, such as the JAX and Torch representations. """ state = self.__dict__ show_pickle_warning = not ( "show_pickle_warnings_" in state and not state["show_pickle_warnings_"] ) - if ("sr_state_" in state or "sr_options_" in state) and show_pickle_warning: - warnings.warn( - "sr_state_ and sr_options_ cannot be pickled and will be removed from the " - "serialized instance. This will prevent a `warm_start` fit of any " - "model that is deserialized via `pickle.load()`." - ) state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"] for state_key in state_keys_containing_lambdas: if state[state_key] is not None and show_pickle_warning: @@ -1059,10 +1077,7 @@ def __getstate__(self): "serialized instance. When loading the model, please redefine " f"`{state_key}` at runtime." ) - state_keys_to_clear = [ - "sr_state_", - "sr_options_", - ] + state_keys_containing_lambdas + state_keys_to_clear = state_keys_containing_lambdas pickled_state = { key: (None if key in state_keys_to_clear else value) for key, value in state.items() @@ -1113,12 +1128,22 @@ def equations(self): # pragma: no cover return self.equations_ @property - def raw_julia_state_(self): # pragma: no cover + def julia_options_(self): + return jl_deserialize(self.julia_options_stream_) + + @property + def julia_state_(self): + return jl_deserialize(self.julia_state_stream_) + + @property + def raw_julia_state_(self): warnings.warn( "PySRRegressor.raw_julia_state_ is now deprecated. " - "Please use PySRRegressor.sr_state_ instead.", + "Please use PySRRegressor.julia_state_ instead, or julia_state_stream_ " + "for the raw stream of bytes.", + FutureWarning, ) - return self.sr_state_ + return self.julia_state_ def get_best(self, index=None): """ @@ -1233,8 +1258,10 @@ def _validate_and_set_init_params(self): "to True and `procs` to 0 will result in non-deterministic searches. " ) - if self.loss is not None and self.full_objective is not None: - raise ValueError("You cannot set both `loss` and `full_objective`.") + if self.elementwise_loss is not None and self.loss_function is not None: + raise ValueError( + "You cannot set both `elementwise_loss` and `loss_function`." + ) # NotImplementedError - Values that could be supported at a later time if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS: @@ -1242,9 +1269,9 @@ def _validate_and_set_init_params(self): f"PySR currently only supports the following optimizer algorithms: {VALID_OPTIMIZER_ALGORITHMS}" ) + progress = self.progress # 'Mutable' parameter validation - buffer_available = "buffer" in sys.stdout.__dir__() - # Params and their default values, if None is given: + # (Params and their default values, if None is given:) default_param_mapping = { "binary_operators": "+ * - /".split(" "), "unary_operators": [], @@ -1253,7 +1280,7 @@ def _validate_and_set_init_params(self): "multithreading": self.procs != 0 and self.cluster_manager is None, "batch_size": 1, "update_verbosity": int(self.verbosity), - "progress": buffer_available, + "progress": progress, } packed_modified_params = {} for parameter, default_value in default_param_mapping.items(): @@ -1272,7 +1299,11 @@ def _validate_and_set_init_params(self): "`batch_size` has been increased to equal one." ) parameter_value = 1 - elif parameter == "progress" and not buffer_available: + elif ( + parameter == "progress" + and parameter_value + and "buffer" not in sys.stdout.__dir__() + ): warnings.warn( "Note: it looks like you are running in Jupyter. " "The progress bar will be turned off." @@ -1286,16 +1317,6 @@ def _validate_and_set_init_params(self): > 0 ) - julia_kwargs = {} - if self.julia_kwargs is not None: - for key, value in self.julia_kwargs.items(): - julia_kwargs[key] = value - if "optimize" not in julia_kwargs: - julia_kwargs["optimize"] = 3 - if "threads" not in julia_kwargs and packed_modified_params["multithreading"]: - julia_kwargs["threads"] = self.procs - packed_modified_params["julia_kwargs"] = julia_kwargs - return packed_modified_params def _validate_and_set_fit_params( @@ -1523,7 +1544,6 @@ def _run(self, X, y, mutated_params, weights, seed): # Need to be global as we don't want to recreate/reinstate julia for # every new instance of PySRRegressor global already_ran - global Main # These are the parameters which may be modified from the ones # specified in init, so we define them here locally: @@ -1538,32 +1558,13 @@ def _run(self, X, y, mutated_params, weights, seed): batch_size = mutated_params["batch_size"] update_verbosity = mutated_params["update_verbosity"] progress = mutated_params["progress"] - julia_kwargs = mutated_params["julia_kwargs"] # Start julia backend processes if not already_ran and update_verbosity != 0: print("Compiling Julia backend...") - Main = init_julia(self.julia_project, julia_kwargs=julia_kwargs) - if cluster_manager is not None: - cluster_manager = _load_cluster_manager(Main, cluster_manager) - - if self.update: - _, is_shared = _process_julia_project(self.julia_project) - io = "devnull" if update_verbosity == 0 else "stderr" - io_arg = ( - f"io={io}" if is_julia_version_greater_eq(version=(1, 6, 0)) else "" - ) - _update_julia_project(Main, is_shared, io_arg) - - SymbolicRegression = _load_backend(Main) - - Main.plus = Main.eval("(+)") - Main.sub = Main.eval("(-)") - Main.mult = Main.eval("(*)") - Main.pow = Main.eval("(^)") - Main.div = Main.eval("(/)") + cluster_manager = _load_cluster_manager(cluster_manager) # TODO(mcranmer): These functions should be part of this class. binary_operators, unary_operators = _maybe_create_inline_operators( @@ -1589,7 +1590,7 @@ def _run(self, X, y, mutated_params, weights, seed): nested_constraints_str += f"({inner_k}) => {inner_v}, " nested_constraints_str += "), " nested_constraints_str += ")" - nested_constraints = Main.eval(nested_constraints_str) + nested_constraints = jl.seval(nested_constraints_str) # Parse dict into Julia Dict for complexities: if complexity_of_operators is not None: @@ -1597,18 +1598,34 @@ def _run(self, X, y, mutated_params, weights, seed): for k, v in complexity_of_operators.items(): complexity_of_operators_str += f"({k}) => {v}, " complexity_of_operators_str += ")" - complexity_of_operators = Main.eval(complexity_of_operators_str) + complexity_of_operators = jl.seval(complexity_of_operators_str) - custom_loss = Main.eval(self.loss) - custom_full_objective = Main.eval(self.full_objective) + custom_loss = jl.seval( + str(self.elementwise_loss) + if self.elementwise_loss is not None + else "nothing" + ) + custom_full_objective = jl.seval( + str(self.loss_function) if self.loss_function is not None else "nothing" + ) + + early_stop_condition = jl.seval( + str(self.early_stop_condition) + if self.early_stop_condition is not None + else "nothing" + ) - early_stop_condition = Main.eval( - str(self.early_stop_condition) if self.early_stop_condition else None + load_required_packages( + turbo=self.turbo, + bumper=self.bumper, + enable_autodiff=self.enable_autodiff, + cluster_manager=cluster_manager, ) mutation_weights = SymbolicRegression.MutationWeights( mutate_constant=self.weight_mutate_constant, mutate_operator=self.weight_mutate_operator, + swap_operands=self.weight_swap_operands, add_node=self.weight_add_node, insert_node=self.weight_insert_node, delete_node=self.weight_delete_node, @@ -1620,11 +1637,11 @@ def _run(self, X, y, mutated_params, weights, seed): # Call to Julia backend. # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl - self.sr_options_ = SymbolicRegression.Options( - binary_operators=Main.eval(str(binary_operators).replace("'", "")), - unary_operators=Main.eval(str(unary_operators).replace("'", "")), - bin_constraints=bin_constraints, - una_constraints=una_constraints, + options = SymbolicRegression.Options( + binary_operators=jl.seval(str(binary_operators).replace("'", "")), + unary_operators=jl.seval(str(unary_operators).replace("'", "")), + bin_constraints=jl_array(bin_constraints), + una_constraints=jl_array(una_constraints), complexity_of_operators=complexity_of_operators, complexity_of_constants=self.complexity_of_constants, complexity_of_variables=self.complexity_of_variables, @@ -1642,22 +1659,26 @@ def _run(self, X, y, mutated_params, weights, seed): # These have the same name: parsimony=self.parsimony, dimensional_constraint_penalty=self.dimensional_constraint_penalty, + dimensionless_constants_only=self.dimensionless_constants_only, alpha=self.alpha, maxdepth=maxdepth, fast_cycle=self.fast_cycle, turbo=self.turbo, + bumper=self.bumper, enable_autodiff=self.enable_autodiff, migration=self.migration, hof_migration=self.hof_migration, fraction_replaced_hof=self.fraction_replaced_hof, should_simplify=self.should_simplify, should_optimize_constants=self.should_optimize_constants, - warmup_maxsize_by=self.warmup_maxsize_by, + warmup_maxsize_by=( + 0.0 if self.warmup_maxsize_by is None else self.warmup_maxsize_by + ), use_frequency=self.use_frequency, use_frequency_in_tournament=self.use_frequency_in_tournament, adaptive_parsimony_scaling=self.adaptive_parsimony_scaling, npop=self.population_size, - ncycles_per_iteration=self.ncyclesperiteration, + ncycles_per_iteration=self.ncycles_per_iteration, fraction_replaced=self.fraction_replaced, topn=self.topn, print_precision=self.print_precision, @@ -1677,6 +1698,8 @@ def _run(self, X, y, mutated_params, weights, seed): define_helper_functions=False, ) + self.julia_options_stream_ = jl_serialize(options) + # Convert data to desired precision test_X = np.array(X) is_complex = np.issubdtype(test_X.dtype, np.complexfloating) @@ -1687,18 +1710,18 @@ def _run(self, X, y, mutated_params, weights, seed): np_dtype = {32: np.complex64, 64: np.complex128}[self.precision] # This converts the data into a Julia array: - Main.X = np.array(X, dtype=np_dtype).T + jl_X = jl_array(np.array(X, dtype=np_dtype).T) if len(y.shape) == 1: - Main.y = np.array(y, dtype=np_dtype) + jl_y = jl_array(np.array(y, dtype=np_dtype)) else: - Main.y = np.array(y, dtype=np_dtype).T + jl_y = jl_array(np.array(y, dtype=np_dtype).T) if weights is not None: if len(weights.shape) == 1: - Main.weights = np.array(weights, dtype=np_dtype) + jl_weights = jl_array(np.array(weights, dtype=np_dtype)) else: - Main.weights = np.array(weights, dtype=np_dtype).T + jl_weights = jl_array(np.array(weights, dtype=np_dtype).T) else: - Main.weights = None + jl_weights = None if self.procs == 0 and not multithreading: parallelism = "serial" @@ -1711,33 +1734,45 @@ def _run(self, X, y, mutated_params, weights, seed): None if parallelism in ["serial", "multithreading"] else int(self.procs) ) - y_variable_names = None if len(y.shape) > 1: # We set these manually so that they respect Python's 0 indexing # (by default Julia will use y1, y2...) - y_variable_names = [f"y{_subscriptify(i)}" for i in range(y.shape[1])] + jl_y_variable_names = jl_array( + [f"y{_subscriptify(i)}" for i in range(y.shape[1])] + ) + else: + jl_y_variable_names = None - # Call to Julia backend. - # See https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl - self.sr_state_ = SymbolicRegression.equation_search( - Main.X, - Main.y, - weights=Main.weights, + PythonCall.GC.disable() + out = SymbolicRegression.equation_search( + jl_X, + jl_y, + weights=jl_weights, niterations=int(self.niterations), - variable_names=self.feature_names_in_.tolist(), - display_variable_names=self.display_feature_names_in_.tolist(), - y_variable_names=y_variable_names, - X_units=self.X_units_, - y_units=self.y_units_, - options=self.sr_options_, + variable_names=jl_array([str(v) for v in self.feature_names_in_]), + display_variable_names=jl_array( + [str(v) for v in self.display_feature_names_in_] + ), + y_variable_names=jl_y_variable_names, + X_units=jl_array(self.X_units_), + y_units=( + jl_array(self.y_units_) + if isinstance(self.y_units_, list) + else self.y_units_ + ), + options=options, numprocs=cprocs, parallelism=parallelism, - saved_state=self.sr_state_, + saved_state=self.julia_state_, return_state=True, addprocs_function=cluster_manager, + heap_size_hint_in_bytes=self.heap_size_hint_in_bytes, progress=progress and self.verbosity > 0 and len(y.shape) == 1, verbosity=int(self.verbosity), ) + PythonCall.GC.enable() + + self.julia_state_stream_ = jl_serialize(out) # Set attributes self.equations_ = self.get_hof() @@ -1801,10 +1836,10 @@ def fit( Fitted estimator. """ # Init attributes that are not specified in BaseEstimator - if self.warm_start and hasattr(self, "sr_state_"): + if self.warm_start and hasattr(self, "julia_state_stream_"): pass else: - if hasattr(self, "sr_state_"): + if hasattr(self, "julia_state_stream_"): warnings.warn( "The discovered expressions are being reset. " "Please set `warm_start=True` if you wish to continue " @@ -1814,8 +1849,8 @@ def fit( self.equations_ = None self.nout_ = 1 self.selection_mask_ = None - self.sr_state_ = None - self.sr_options_ = None + self.julia_state_stream_ = None + self.julia_options_stream_ = None self.X_units_ = None self.y_units_ = None @@ -2212,6 +2247,7 @@ def get_hof(self): for _, eqn_row in output.iterrows(): eqn = pysr2sympy( eqn_row["equation"], + feature_names_in=self.feature_names_in_, extra_sympy_mappings=self.extra_sympy_mappings, ) sympy_format.append(eqn) diff --git a/pysr/test/__init__.py b/pysr/test/__init__.py index 7b176eab..cb6b9e4a 100644 --- a/pysr/test/__init__.py +++ b/pysr/test/__init__.py @@ -1,7 +1,15 @@ from .test import runtests -from .test_cli import runtests as runtests_cli -from .test_env import runtests as runtests_env +from .test_cli import get_runtests as get_runtests_cli +from .test_dev import runtests as runtests_dev from .test_jax import runtests as runtests_jax +from .test_startup import runtests as runtests_startup from .test_torch import runtests as runtests_torch -__all__ = ["runtests", "runtests_env", "runtests_jax", "runtests_torch", "runtests_cli"] +__all__ = [ + "runtests", + "runtests_jax", + "runtests_torch", + "get_runtests_cli", + "runtests_startup", + "runtests_dev", +] diff --git a/pysr/test/__main__.py b/pysr/test/__main__.py index b0ec3b36..875041cf 100644 --- a/pysr/test/__main__.py +++ b/pysr/test/__main__.py @@ -1,43 +1,14 @@ """CLI for running PySR's test suite.""" + import argparse -import os from . import * if __name__ == "__main__": # Get args: parser = argparse.ArgumentParser() - parser.usage = "python -m pysr.test [tests...]" parser.add_argument( "test", nargs="*", - help="Test to run. One or more of 'main', 'env', 'jax', 'torch', 'cli'.", + help="DEPRECATED. Use `python -m pysr test [tests...]` instead.", ) - - # Parse args: - args = parser.parse_args() - tests = args.test - - if len(tests) == 0: - # Raise help message: - parser.print_help() - raise SystemExit(1) - - # Run tests: - for test in tests: - if test in {"main", "env", "jax", "torch", "cli"}: - cur_dir = os.path.dirname(os.path.abspath(__file__)) - print(f"Running test from {cur_dir}") - if test == "main": - runtests() - elif test == "env": - runtests_env() - elif test == "jax": - runtests_jax() - elif test == "torch": - runtests_torch() - elif test == "cli": - runtests_cli() - else: - parser.print_help() - raise SystemExit(1) diff --git a/pysr/test/generate_dev_juliapkg.py b/pysr/test/generate_dev_juliapkg.py new file mode 100644 index 00000000..ac4f6b29 --- /dev/null +++ b/pysr/test/generate_dev_juliapkg.py @@ -0,0 +1,19 @@ +# Example call: +## python3 generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl +import json +import sys + +juliapkg_json = sys.argv[1] +path_to_srjl = sys.argv[2] + +with open(juliapkg_json, "r") as f: + juliapkg = json.load(f) + +juliapkg["packages"]["SymbolicRegression"] = { + "uuid": juliapkg["packages"]["SymbolicRegression"]["uuid"], + "path": path_to_srjl, + "dev": True, +} + +with open(juliapkg_json, "w") as f: + json.dump(juliapkg, f, indent=4) diff --git a/pysr/test/incremental_install_simulator.dockerfile b/pysr/test/incremental_install_simulator.dockerfile deleted file mode 100644 index 62811e8c..00000000 --- a/pysr/test/incremental_install_simulator.dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -# This dockerfile simulates a user installation that first -# builds PySR for Python 3.9, and then upgrades to Python 3.10. -# Normally this would cause an error when installing PyCall, so we want to -# ensure that PySR can automatically patch things. -FROM debian:bullseye-slim - -ENV DEBIAN_FRONTEND=noninteractive - -# Install juliaup and pyenv: -RUN apt-get update && apt-get install -y curl git build-essential \ - libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev \ - libncurses5-dev libncursesw5-dev xz-utils libffi-dev liblzma-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Install juliaup: -RUN curl -fsSL https://install.julialang.org | sh -s -- -y - -# Install pyenv: -RUN curl -fsSL curl https://pyenv.run | sh && \ - echo 'export PATH="/root/.pyenv/bin:$PATH"' >> ~/.bashrc && \ - echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \ - echo 'eval "$(pyenv init -)"' >> ~/.bashrc && \ - echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc - -# Default to using bash -l: -SHELL ["/bin/bash", "-l", "-c"] - -RUN juliaup add 1.8 && juliaup default 1.8 -RUN pyenv install 3.9.2 && pyenv global 3.9.2 -RUN python3 -m pip install --upgrade pip - -# Get PySR source: -WORKDIR /pysr -ADD ./requirements.txt /pysr/requirements.txt -RUN python3 -m pip install -r /pysr/requirements.txt - -ADD ./setup.py /pysr/setup.py -ADD ./pysr/ /pysr/pysr/ - -# First install of PySR: -RUN python3 -m pip install . -RUN python3 -m pysr install - -# Change Python version: -RUN pyenv install 3.10 && pyenv global 3.10 && pyenv uninstall -f 3.9.2 -RUN python3 -m pip install --upgrade pip - -# Second install of PySR: -RUN python3 -m pip install . -RUN rm -r ~/.julia/environments/pysr-* -RUN python3 -m pysr install diff --git a/pysr/test/nb_sanitize.cfg b/pysr/test/nb_sanitize.cfg new file mode 100644 index 00000000..caabeb6a --- /dev/null +++ b/pysr/test/nb_sanitize.cfg @@ -0,0 +1,3 @@ +[pathnames] +regex: /[a-zA-Z0-9_\- .\/]+/pysr/sr\.py +replace: PATH diff --git a/pysr/test/params.py b/pysr/test/params.py new file mode 100644 index 00000000..9850c9cd --- /dev/null +++ b/pysr/test/params.py @@ -0,0 +1,8 @@ +import inspect + +from .. import PySRRegressor + +DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters +DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default +DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default +DEFAULT_NCYCLES = DEFAULT_PARAMS["ncycles_per_iteration"].default diff --git a/pysr/test/test.py b/pysr/test/test.py index df361e6f..9a71a82a 100644 --- a/pysr/test/test.py +++ b/pysr/test/test.py @@ -1,4 +1,3 @@ -import inspect import os import pickle as pkl import tempfile @@ -12,16 +11,18 @@ import sympy from sklearn.utils.estimator_checks import check_estimator -from .. import PySRRegressor, julia_helpers +from .. import PySRRegressor, install, jl from ..export_latex import sympy2latex from ..feature_selection import _handle_feature_selection, run_feature_selection +from ..julia_helpers import init_julia from ..sr import _check_assertions, _process_constraints, idx_model_selection from ..utils import _csv_filename_to_pkl_filename - -DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters -DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default -DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default -DEFAULT_NCYCLES = DEFAULT_PARAMS["ncyclesperiteration"].default +from .params import ( + DEFAULT_NCYCLES, + DEFAULT_NITERATIONS, + DEFAULT_PARAMS, + DEFAULT_POPULATIONS, +) class TestPipeline(unittest.TestCase): @@ -57,16 +58,20 @@ def test_linear_relation_named(self): model.fit(self.X, y, variable_names=["c1", "c2", "c3", "c4", "c5"]) self.assertIn("c1", model.equations_.iloc[-1]["equation"]) - def test_linear_relation_weighted(self): + def test_linear_relation_weighted_bumper(self): y = self.X[:, 0] weights = np.ones_like(y) model = PySRRegressor( **self.default_test_kwargs, early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1", + bumper=True, ) model.fit(self.X, y, weights=weights) print(model.equations_) self.assertLessEqual(model.get_best()["loss"], 1e-4) + self.assertEqual( + jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.bumper), True + ) def test_multiprocessing_turbo_custom_objective(self): rstate = np.random.RandomState(0) @@ -80,7 +85,7 @@ def test_multiprocessing_turbo_custom_objective(self): multithreading=False, turbo=True, early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1", - full_objective=""" + loss_function=""" function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T prediction, flag = eval_tree_array(tree, dataset.X, options) !flag && return T(Inf) @@ -95,22 +100,43 @@ def test_multiprocessing_turbo_custom_objective(self): self.assertLessEqual(best_loss, 1e-10) self.assertGreaterEqual(best_loss, 0.0) + # Test options stored: + self.assertEqual( + jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), True + ) + + def test_multiline_seval(self): + # The user should be able to run multiple things in a single seval call: + num = jl.seval( + """ + function my_new_objective(x) + x^2 + end + 1.5 + """ + ) + self.assertEqual(num, 1.5) + def test_high_precision_search_custom_loss(self): y = 1.23456789 * self.X[:, 0] model = PySRRegressor( **self.default_test_kwargs, early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3", - loss="my_loss(prediction, target) = (prediction - target)^2", + elementwise_loss="my_loss(prediction, target) = (prediction - target)^2", precision=64, parsimony=0.01, warm_start=True, ) model.fit(self.X, y) - from pysr.sr import Main # We should have that the model state is now a Float64 hof: - Main.test_state = model.sr_state_ - self.assertTrue(Main.eval("typeof(test_state[2]).parameters[1] == Float64")) + test_state = model.raw_julia_state_ + self.assertTrue(jl.typeof(test_state[1]).parameters[1] == jl.Float64) + + # Test options stored: + self.assertEqual( + jl.seval("((::Val{x}) where x) -> x")(model.julia_options_.turbo), False + ) def test_multioutput_custom_operator_quiet_custom_complexity(self): y = self.X[:, [0, 1]] ** 2 @@ -145,10 +171,6 @@ def test_multioutput_custom_operator_quiet_custom_complexity(self): self.assertLessEqual(mse1, 1e-4) self.assertLessEqual(mse2, 1e-4) - bad_y = model.predict(self.X, index=[0, 0]) - bad_mse = np.average((bad_y - y) ** 2) - self.assertGreater(bad_mse, 1e-4) - def test_multioutput_weighted_with_callable_temp_equation(self): X = self.X.copy() y = X[:, [0, 1]] ** 2 @@ -199,6 +221,7 @@ def test_complex_equations_anonymous_stop(self): **self.default_test_kwargs, early_stop_condition="(loss, complexity) -> loss <= 1e-4 && complexity <= 6", ) + model.niterations = DEFAULT_NITERATIONS * 10 model.fit(X, y) test_y = model.predict(X) self.assertTrue(np.issubdtype(test_y.dtype, np.complexfloating)) @@ -224,16 +247,17 @@ def test_empty_operators_single_input_warm_start(self): # Test if repeated fit works: regressor.set_params( niterations=1, - ncyclesperiteration=2, + ncycles_per_iteration=2, warm_start=True, early_stop_condition=None, ) - # Check that the the julia state is saved: - from pysr.sr import Main # We should have that the model state is now a Float32 hof: - Main.test_state = regressor.sr_state_ - self.assertTrue(Main.eval("typeof(test_state[2]).parameters[1] == Float32")) + test_state = regressor.julia_state_ + self.assertTrue( + jl.first(jl.typeof(jl.last(test_state)).parameters) == jl.Float32 + ) + # This should exit almost immediately, and use the old equations regressor.fit(X, y) @@ -252,7 +276,7 @@ def test_warm_start_set_at_init(self): regressor = PySRRegressor(warm_start=True, max_evals=10) regressor.fit(self.X, y) - def test_noisy(self): + def test_noisy_builtin_variable_names(self): y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05 model = PySRRegressor( # Test that passing a single operator works: @@ -269,9 +293,12 @@ def test_noisy(self): model.set_params(model_selection="best") # Also try without a temp equation file: model.set_params(temp_equation_file=False) - model.fit(self.X, y) + # We also test builtin variable names + model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"]) self.assertLessEqual(model.get_best()[1]["loss"], 1e-2) self.assertLessEqual(model.get_best()[1]["loss"], 1e-2) + self.assertIn("exec", model.latex()[0]) + self.assertIn("hash", model.latex()[1]) def test_pandas_resample_with_nested_constraints(self): X = pd.DataFrame( @@ -548,6 +575,17 @@ def test_deprecation(self): # The correct value should be set: self.assertEqual(model.fraction_replaced, 0.2) + def test_deprecated_functions(self): + with self.assertWarns(FutureWarning): + install() + + _jl = None + + with self.assertWarns(FutureWarning): + _jl = init_julia() + + self.assertEqual(_jl, jl) + def test_power_law_warning(self): """Ensure that a warning is given for a power law operator.""" with self.assertWarns(UserWarning): @@ -594,23 +632,6 @@ def test_deterministic_errors(self): with self.assertRaises(ValueError): model.fit(X, y) - def test_changed_options_warning(self): - """Check that a warning is given if Julia options are changed.""" - if julia_helpers.julia_kwargs_at_initialization is None: - julia_helpers.init_julia(julia_kwargs={"threads": 2, "optimize": 3}) - - cur_init = julia_helpers.julia_kwargs_at_initialization - - threads_to_change = cur_init["threads"] + 1 - with warnings.catch_warnings(): - warnings.simplefilter("error") - with self.assertRaises(Exception) as context: - julia_helpers.init_julia( - julia_kwargs={"threads": threads_to_change, "optimize": 3} - ) - self.assertIn("Julia has already started", str(context.exception)) - self.assertIn("threads", str(context.exception)) - def test_extra_sympy_mappings_undefined(self): """extra_sympy_mappings=None errors for custom operators""" model = PySRRegressor(unary_operators=["square2(x) = x^2"]) @@ -640,6 +661,50 @@ def test_bad_variable_names_fail(self): model.fit(X, y, variable_names=["f{c}"]) self.assertIn("Invalid variable name", str(cm.exception)) + def test_bad_kwargs(self): + bad_kwargs = [ + dict( + kwargs=dict( + elementwise_loss="g(x, y) = 0.0", loss_function="f(*args) = 0.0" + ), + error=ValueError, + ), + dict( + kwargs=dict(maxsize=3), + error=ValueError, + ), + dict( + kwargs=dict(tournament_selection_n=10, population_size=3), + error=ValueError, + ), + dict( + kwargs=dict(optimizer_algorithm="COBYLA"), + error=NotImplementedError, + ), + dict( + kwargs=dict( + constraints={ + "+": (3, 5), + } + ), + error=NotImplementedError, + ), + dict( + kwargs=dict(binary_operators=["ฮฑ(x, y) = x - y"]), + error=ValueError, + ), + dict( + kwargs=dict(model_selection="unknown"), + error=NotImplementedError, + ), + ] + for opt in bad_kwargs: + model = PySRRegressor(**opt["kwargs"], niterations=1) + with self.assertRaises(opt["error"]): + model.fit([[1]], [1]) + model.get_best() + print("Failed", opt["kwargs"]) + def test_pickle_with_temp_equation_file(self): """If we have a temporary equation file, unpickle the estimator.""" model = PySRRegressor( @@ -678,7 +743,7 @@ def test_scikit_learn_compatibility(self): model = PySRRegressor( niterations=int(1 + DEFAULT_NITERATIONS / 10), populations=int(1 + DEFAULT_POPULATIONS / 3), - ncyclesperiteration=int(2 + DEFAULT_NCYCLES / 10), + ncycles_per_iteration=int(2 + DEFAULT_NCYCLES / 10), verbosity=0, progress=False, random_state=0, @@ -715,6 +780,9 @@ def test_scikit_learn_compatibility(self): def test_param_groupings(self): """Test that param_groupings are complete""" param_groupings_file = Path(__file__).parent.parent / "param_groupings.yml" + if not param_groupings_file.exists(): + return + # Read the file, discarding lines ending in ":", # and removing leading "\s*-\s*": params = [] @@ -964,9 +1032,8 @@ def test_dimensional_constraints(self): for i in range(2): self.assertGreater(model.get_best()[i]["complexity"], 2) self.assertLess(model.get_best()[i]["loss"], 1e-6) - self.assertGreater( - model.equations_[i].query("complexity <= 2").loss.min(), 1e-6 - ) + simple_eqs = model.equations_[i].query("complexity <= 2") + self.assertTrue(len(simple_eqs) == 0 or simple_eqs.loss.min() > 1e-6) def test_unit_checks(self): """This just checks the number of units passed""" @@ -977,7 +1044,7 @@ def test_unit_checks(self): valid_units = [ (np.ones((10, 2)), np.ones(10), ["m/s", "s"], "m"), (np.ones((10, 1)), np.ones(10), ["m/s"], None), - (np.ones((10, 1)), np.ones(10), None, "m/s"), + (np.ones((10, 1)), np.ones(10), None, "km/s"), (np.ones((10, 1)), np.ones(10), None, ["m/s"]), (np.ones((10, 1)), np.ones((10, 1)), None, ["m/s"]), (np.ones((10, 1)), np.ones((10, 2)), None, ["m/s", ""]), @@ -992,7 +1059,7 @@ def test_unit_checks(self): ) invalid_units = [ (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], None), - (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], "m"), + (np.ones((10, 2)), np.ones(10), ["m/s", "s", "s^2"], "km"), (np.ones((10, 2)), np.ones((10, 2)), ["m/s", "s"], ["m"]), (np.ones((10, 1)), np.ones((10, 1)), "m/s", ["m"]), ] @@ -1043,8 +1110,10 @@ def test_unit_propagation(self): self.assertNotIn("x1", best["equation"]) self.assertIn("x2", best["equation"]) self.assertEqual(best["complexity"], 3) - self.assertEqual(model.equations_.iloc[0].complexity, 1) - self.assertGreater(model.equations_.iloc[0].loss, 1e-6) + self.assertTrue( + model.equations_.iloc[0].complexity > 1 + or model.equations_.iloc[0].loss > 1e-6 + ) # With pkl file: pkl_file = str(temp_dir / "equation_file.pkl") @@ -1063,8 +1132,8 @@ def test_unit_propagation(self): # Try warm start, but with no units provided (should # be a different dataset, and thus different result): - model.fit(X, y) model.early_stop_condition = "(l, c) -> l < 1e-6 && c == 1" + model.fit(X, y) self.assertEqual(model.equations_.iloc[0].complexity, 1) self.assertLess(model.equations_.iloc[0].loss, 1e-6) @@ -1072,10 +1141,8 @@ def test_unit_propagation(self): # TODO: Determine desired behavior if second .fit() call does not have units -def runtests(): +def runtests(just_tests=False): """Run all tests in test.py.""" - suite = unittest.TestSuite() - loader = unittest.TestLoader() test_cases = [ TestPipeline, TestBest, @@ -1084,8 +1151,11 @@ def runtests(): TestLaTeXTable, TestDimensionalConstraints, ] + if just_tests: + return test_cases + suite = unittest.TestSuite() + loader = unittest.TestLoader() for test_case in test_cases: - tests = loader.loadTestsFromTestCase(test_case) - suite.addTests(tests) + suite.addTests(loader.loadTestsFromTestCase(test_case)) runner = unittest.TextTestRunner() return runner.run(suite) diff --git a/pysr/test/test_cli.py b/pysr/test/test_cli.py index 0a97a1eb..6d2a3a3a 100644 --- a/pysr/test/test_cli.py +++ b/pysr/test/test_cli.py @@ -1,59 +1,84 @@ import unittest +from textwrap import dedent from click import testing as click_testing -from .._cli.main import pysr - - -class TestCli(unittest.TestCase): - # TODO: Include test for custom project here. - def setUp(self): - self.cli_runner = click_testing.CliRunner() - - def test_help_on_all_commands(self): - expected = "\n".join( - [ - "Usage: pysr [OPTIONS] COMMAND [ARGS]...", - "", - "Options:", - " --help Show this message and exit.", - "", - "Commands:", - " install Install Julia dependencies for PySR.", - "", - ] - ) - result = self.cli_runner.invoke(pysr, ["--help"]) - self.assertEqual(expected, result.output) - self.assertEqual(0, result.exit_code) - - def test_help_on_install(self): - expected = "\n".join( - [ - "Usage: pysr install [OPTIONS]", - "", - " Install Julia dependencies for PySR.", - "", - "Options:", - " -p, --project PROJECT_DIRECTORY", - " Install in a specific Julia project (e.g., a", - " local copy of SymbolicRegression.jl).", - " -q, --quiet Disable logging.", - " --precompile Force precompilation of Julia libraries.", - " --no-precompile Disable precompilation.", - " --help Show this message and exit.", - "", - ] - ) - result = self.cli_runner.invoke(pysr, ["install", "--help"]) - self.assertEqual(expected, result.output) - self.assertEqual(0, result.exit_code) - - -def runtests(): - """Run all tests in cliTest.py.""" - loader = unittest.TestLoader() - suite = unittest.TestSuite() - suite.addTests(loader.loadTestsFromTestCase(TestCli)) - runner = unittest.TextTestRunner() - return runner.run(suite) + +def get_runtests(): + # Lazy load to avoid circular imports. + + from .._cli.main import pysr + + class TestCli(unittest.TestCase): + # TODO: Include test for custom project here. + def setUp(self): + self.cli_runner = click_testing.CliRunner() + + def test_help_on_all_commands(self): + expected = dedent( + """ + Usage: pysr [OPTIONS] COMMAND [ARGS]... + + Options: + --help Show this message and exit. + + Commands: + install DEPRECATED (dependencies are now installed at import). + test Run parts of the PySR test suite. + """ + ) + result = self.cli_runner.invoke(pysr, ["--help"]) + self.assertEqual(result.output.strip(), expected.strip()) + self.assertEqual(result.exit_code, 0) + + def test_help_on_install(self): + expected = dedent( + """ + Usage: pysr install [OPTIONS] + + DEPRECATED (dependencies are now installed at import). + + Options: + -p, --project TEXT + -q, --quiet Disable logging. + --precompile + --no-precompile + --help Show this message and exit. + """ + ) + result = self.cli_runner.invoke(pysr, ["install", "--help"]) + self.assertEqual(result.output.strip(), expected.strip()) + self.assertEqual(result.exit_code, 0) + + def test_help_on_test(self): + expected = dedent( + """ + Usage: pysr test [OPTIONS] TESTS + + Run parts of the PySR test suite. + + Choose from main, jax, torch, cli, dev, and startup. You can give multiple + tests, separated by commas. + + Options: + -k TEXT Filter expressions to select specific tests. + --help Show this message and exit. + """ + ) + result = self.cli_runner.invoke(pysr, ["test", "--help"]) + self.assertEqual(result.output.strip(), expected.strip()) + self.assertEqual(result.exit_code, 0) + + def runtests(just_tests=False): + """Run all tests in cliTest.py.""" + tests = [TestCli] + if just_tests: + return tests + loader = unittest.TestLoader() + suite = unittest.TestSuite() + for test in tests: + suite.addTests(loader.loadTestsFromTestCase(test)) + runner = unittest.TextTestRunner() + return runner.run(suite) + + return runtests diff --git a/pysr/test/test_dev.py b/pysr/test/test_dev.py new file mode 100644 index 00000000..b8a2b464 --- /dev/null +++ b/pysr/test/test_dev.py @@ -0,0 +1,59 @@ +import os +import subprocess +import unittest +from pathlib import Path + + +class TestDev(unittest.TestCase): + def test_simple_change_to_backend(self): + """Test that we can use a development version of SymbolicRegression.jl""" + PYSR_TEST_JULIA_VERSION = os.environ.get("PYSR_TEST_JULIA_VERSION", "1.6") + PYSR_TEST_PYTHON_VERSION = os.environ.get("PYSR_TEST_PYTHON_VERSION", "3.9") + build_result = subprocess.run( + [ + "docker", + "build", + "-t", + "pysr-dev", + "--build-arg", + f"JLVERSION={PYSR_TEST_JULIA_VERSION}", + "--build-arg", + f"PYVERSION={PYSR_TEST_PYTHON_VERSION}", + "-f", + "pysr/test/test_dev_pysr.dockerfile", + ".", + ], + env=os.environ, + cwd=Path(__file__).parent.parent.parent, + universal_newlines=True, + ) + self.assertEqual(build_result.returncode, 0) + test_result = subprocess.run( + [ + "docker", + "run", + "--rm", + "pysr-dev", + "python3", + "-c", + "from pysr import SymbolicRegression as SR; print(SR.__test_function())", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + cwd=Path(__file__).parent.parent.parent, + ) + self.assertEqual(test_result.returncode, 0) + self.assertEqual(test_result.stdout.decode("utf-8").strip(), "2.3") + + +def runtests(just_tests=False): + tests = [TestDev] + if just_tests: + return tests + suite = unittest.TestSuite() + loader = unittest.TestLoader() + for test in tests: + suite.addTests(loader.loadTestsFromTestCase(test)) + runner = unittest.TextTestRunner() + return runner.run(suite) diff --git a/pysr/test/test_dev_pysr.dockerfile b/pysr/test/test_dev_pysr.dockerfile new file mode 100644 index 00000000..2978e82b --- /dev/null +++ b/pysr/test/test_dev_pysr.dockerfile @@ -0,0 +1,55 @@ +# This dockerfile simulates a user installation that +# tries to manually edit SymbolicRegression.jl and +# use it from PySR. + +ARG JLVERSION=1.9.4 +ARG PYVERSION=3.11.6 +ARG BASE_IMAGE=bullseye + +FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl +FROM python:${PYVERSION}-${BASE_IMAGE} + +# Merge Julia image: +COPY --from=jl /usr/local/julia /usr/local/julia +ENV PATH="/usr/local/julia/bin:${PATH}" + +WORKDIR /pysr + +# Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project) +ADD ./requirements.txt /pysr/requirements.txt +RUN pip3 install --no-cache-dir -r /pysr/requirements.txt + +# Install PySR: +# We do a minimal copy so it doesn't need to rerun at every file change: +ADD ./pyproject.toml /pysr/pyproject.toml +ADD ./setup.py /pysr/setup.py + +RUN mkdir /pysr/pysr +ADD ./pysr/*.py /pysr/pysr/ +ADD ./pysr/juliapkg.json /pysr/pysr/juliapkg.json + +RUN mkdir /pysr/pysr/_cli +ADD ./pysr/_cli/*.py /pysr/pysr/_cli/ + +RUN mkdir /pysr/pysr/test + +# Now, we create a custom version of SymbolicRegression.jl +# First, we get the version from juliapkg.json: +RUN python3 -c 'import json; print(json.load(open("/pysr/pysr/juliapkg.json", "r"))["packages"]["SymbolicRegression"]["version"])' > /pysr/sr_version + +# Remove any = or ^ or ~ from the version: +RUN cat /pysr/sr_version | sed 's/[\^=~]//g' > /pysr/sr_version_processed + +# Now, we check out the version of SymbolicRegression.jl that PySR is using: +RUN git clone -b "v$(cat /pysr/sr_version_processed)" --single-branch https://github.com/MilesCranmer/SymbolicRegression.jl /srjl + +# Edit SymbolicRegression.jl to create a new function. +# We want to put this function immediately after `module SymbolicRegression`: +RUN sed -i 's/module SymbolicRegression/module SymbolicRegression\n__test_function() = 2.3/' /srjl/src/SymbolicRegression.jl + +# Edit PySR to use the custom version of SymbolicRegression.jl: +ADD ./pysr/test/generate_dev_juliapkg.py /generate_dev_juliapkg.py +RUN python3 /generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl + +# Install and pre-compile +RUN pip3 install --no-cache-dir . && python3 -c 'import pysr' diff --git a/pysr/test/test_env.py b/pysr/test/test_env.py deleted file mode 100644 index 423a3064..00000000 --- a/pysr/test/test_env.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Contains tests for creating and initializing custom Julia projects.""" - -import os -import unittest -from tempfile import TemporaryDirectory - -from .. import julia_helpers - - -class TestJuliaProject(unittest.TestCase): - """Various tests for working with Julia projects.""" - - def test_custom_shared_env(self): - """Test that we can use PySR in a custom shared env.""" - with TemporaryDirectory() as tmpdir: - # Create a temp depot to store julia packages (and our custom env) - Main = julia_helpers.init_julia() - - # Set up env: - if "JULIA_DEPOT_PATH" not in os.environ: - old_env = None - os.environ["JULIA_DEPOT_PATH"] = tmpdir - else: - old_env = os.environ["JULIA_DEPOT_PATH"] - os.environ[ - "JULIA_DEPOT_PATH" - ] = f"{tmpdir}:{os.environ['JULIA_DEPOT_PATH']}" - Main.eval( - f'pushfirst!(DEPOT_PATH, "{julia_helpers._escape_filename(tmpdir)}")' - ) - test_env_name = "@pysr_test_env" - julia_helpers.install(julia_project=test_env_name) - Main = julia_helpers.init_julia(julia_project=test_env_name) - - # Try to use env: - Main.eval("using SymbolicRegression") - Main.eval("using Pkg") - - # Assert we actually loaded it: - cur_project_dir = Main.eval("splitdir(dirname(Base.active_project()))[1]") - potential_shared_project_dirs = Main.eval("Pkg.envdir(DEPOT_PATH[1])") - self.assertEqual(cur_project_dir, potential_shared_project_dirs) - - # Clean up: - Main.eval("pop!(DEPOT_PATH)") - if old_env is None: - del os.environ["JULIA_DEPOT_PATH"] - else: - os.environ["JULIA_DEPOT_PATH"] = old_env - - -def runtests(): - """Run all tests in test_env.py.""" - loader = unittest.TestLoader() - suite = unittest.TestSuite() - suite.addTests(loader.loadTestsFromTestCase(TestJuliaProject)) - runner = unittest.TextTestRunner() - return runner.run(suite) diff --git a/pysr/test/test_jax.py b/pysr/test/test_jax.py index aaafb97c..5e4e5ef1 100644 --- a/pysr/test/test_jax.py +++ b/pysr/test/test_jax.py @@ -121,10 +121,14 @@ def test_feature_selection_custom_operators(self): np.testing.assert_almost_equal(y.values, jax_output, decimal=3) -def runtests(): +def runtests(just_tests=False): """Run all tests in test_jax.py.""" + tests = [TestJAX] + if just_tests: + return tests loader = unittest.TestLoader() suite = unittest.TestSuite() - suite.addTests(loader.loadTestsFromTestCase(TestJAX)) + for test in tests: + suite.addTests(loader.loadTestsFromTestCase(test)) runner = unittest.TextTestRunner() return runner.run(suite) diff --git a/pysr/test/test_nb.ipynb b/pysr/test/test_nb.ipynb new file mode 100644 index 00000000..1cd394ff --- /dev/null +++ b/pysr/test/test_nb.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n" + ] + } + ], + "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "import numpy as np\n", + "from pysr import PySRRegressor, jl" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + } + ], + "source": [ + "%%julia\n", + "\n", + "# Automatically activates Julia magic\n", + "\n", + "x = 1\n", + "println(x + 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n" + ] + } + ], + "source": [ + "%julia println(x + 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "my_loss (generic function with 1 method)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%julia\n", + "function my_loss(x)\n", + " x ^ 2\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%julia my_loss(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'PySRRegressor.equations_ = None'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rstate = np.random.RandomState(0)\n", + "X = np.random.randn(10, 2)\n", + "y = np.random.randn(10)\n", + "\n", + "model = PySRRegressor(deterministic=True, multithreading=False, procs=0, random_state=0, verbosity=0, progress=False, niterations=1, ncycles_per_iteration=1)\n", + "str(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X, y)\n", + "type(model.equations_)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pysr/test/test_startup.py b/pysr/test/test_startup.py new file mode 100644 index 00000000..8a93ad3a --- /dev/null +++ b/pysr/test/test_startup.py @@ -0,0 +1,167 @@ +import os +import platform +import subprocess +import sys +import tempfile +import textwrap +import unittest +from pathlib import Path + +import numpy as np + +from .. import PySRRegressor +from ..julia_import import jl_version +from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS + + +class TestStartup(unittest.TestCase): + """Various tests related to starting up PySR.""" + + def setUp(self): + # Using inspect, + # get default niterations from PySRRegressor, and double them: + self.default_test_kwargs = dict( + progress=False, + model_selection="accuracy", + niterations=DEFAULT_NITERATIONS * 2, + populations=DEFAULT_POPULATIONS * 2, + temp_equation_file=True, + ) + self.rstate = np.random.RandomState(0) + self.X = self.rstate.randn(100, 5) + + def test_warm_start_from_file(self): + """Test that we can warm start in another process.""" + if platform.system() == "Windows": + self.skipTest("Warm start test incompatible with Windows") + + with tempfile.TemporaryDirectory() as tmpdirname: + model = PySRRegressor( + **self.default_test_kwargs, + unary_operators=["cos"], + ) + model.warm_start = True + model.temp_equation_file = False + model.equation_file = Path(tmpdirname) / "equations.csv" + model.deterministic = True + model.multithreading = False + model.random_state = 0 + model.procs = 0 + model.early_stop_condition = 1e-10 + + rstate = np.random.RandomState(0) + X = rstate.randn(100, 2) + y = np.cos(X[:, 0]) ** 2 + model.fit(X, y) + + best_loss = model.equations_.iloc[-1]["loss"] + + # Save X and y to a file: + X_file = Path(tmpdirname) / "X.npy" + y_file = Path(tmpdirname) / "y.npy" + np.save(X_file, X) + np.save(y_file, y) + # Now, create a new process and warm start from the file: + result = subprocess.run( + [ + sys.executable, + "-c", + textwrap.dedent( + f""" + from pysr import PySRRegressor + import numpy as np + + X = np.load("{X_file}") + y = np.load("{y_file}") + + print("Loading model from file") + model = PySRRegressor.from_file("{model.equation_file}") + + assert model.julia_state_ is not None + + # Reset saved equations; should be loaded from state! + model.equations_ = None + model.equation_file_contents_ = None + + model.warm_start = True + model.niterations = 0 + model.max_evals = 0 + model.ncycles_per_iteration = 0 + + model.fit(X, y) + + best_loss = model.equations_.iloc[-1]["loss"] + + assert best_loss <= {best_loss} + """ + ), + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + ) + self.assertEqual(result.returncode, 0) + self.assertIn("Loading model from file", result.stdout.decode()) + self.assertIn("Started!", result.stderr.decode()) + + def test_bad_startup_options(self): + warning_tests = [ + dict( + code='import os; os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "no"; import pysr', + msg="PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set", + ), + dict( + code='import os; os.environ["PYTHON_JULIACALL_THREADS"] = "1"; import pysr', + msg="PYTHON_JULIACALL_THREADS environment variable is set", + ), + dict( + code="import juliacall; import pysr", + msg="juliacall module already imported.", + ), + ] + for warning_test in warning_tests: + result = subprocess.run( + [sys.executable, "-c", warning_test["code"]], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=os.environ, + ) + self.assertIn(warning_test["msg"], result.stderr.decode()) + + def test_notebook(self): + if jl_version < (1, 9, 0): + self.skipTest("Julia version too old") + if platform.system() == "Windows": + self.skipTest("Notebook test incompatible with Windows") + + notebook_file = Path(__file__).parent / "test_nb.ipynb" + sanitize_file = Path(__file__).parent / "nb_sanitize.cfg" + + if not (notebook_file.exists() and sanitize_file.exists()): + self.skipTest("Files not available for testing") + + result = subprocess.run( + [ + sys.executable, + "-m", + "pytest", + "--nbval", + str(notebook_file), + "--nbval-sanitize-with", + str(sanitize_file), + ], + env=os.environ, + ) + self.assertEqual(result.returncode, 0) + + +def runtests(just_tests=False): + tests = [TestStartup] + if just_tests: + return tests + suite = unittest.TestSuite() + loader = unittest.TestLoader() + for test in tests: + suite.addTests(loader.loadTestsFromTestCase(test)) + runner = unittest.TextTestRunner() + return runner.run(suite) diff --git a/pysr/test/test_torch.py b/pysr/test/test_torch.py index 5a71af71..35055c6a 100644 --- a/pysr/test/test_torch.py +++ b/pysr/test/test_torch.py @@ -1,4 +1,3 @@ -import platform import unittest import numpy as np @@ -7,42 +6,28 @@ from .. import PySRRegressor, sympy2torch -# Need to initialize Julia before importing torch... - - -def _import_torch(): - if platform.system() == "Darwin": - # Import PyJulia, then Torch - from ..julia_helpers import init_julia - - init_julia() - - import torch - else: - # Import Torch, then PyJulia - # https://github.com/pytorch/pytorch/issues/78829 - import torch - return torch - class TestTorch(unittest.TestCase): def setUp(self): np.random.seed(0) + # Need to import after juliacall: + import torch + + self.torch = torch + def test_sympy2torch(self): - torch = _import_torch() x, y, z = sympy.symbols("x y z") cosx = 1.0 * sympy.cos(x) + y - X = torch.tensor(np.random.randn(1000, 3)) - true = 1.0 * torch.cos(X[:, 0]) + X[:, 1] + X = self.torch.tensor(np.random.randn(1000, 3)) + true = 1.0 * self.torch.cos(X[:, 0]) + X[:, 1] torch_module = sympy2torch(cosx, [x, y, z]) self.assertTrue( np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy())) ) def test_pipeline_pandas(self): - torch = _import_torch() X = pd.DataFrame(np.random.randn(100, 10)) y = np.ones(X.shape[0]) model = PySRRegressor( @@ -71,13 +56,12 @@ def test_pipeline_pandas(self): self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)") np.testing.assert_almost_equal( - tformat(torch.tensor(X.values)).detach().numpy(), + tformat(self.torch.tensor(X.values)).detach().numpy(), np.square(np.cos(X.values[:, 1])), # Selection 1st feature decimal=3, ) def test_pipeline(self): - torch = _import_torch() X = np.random.randn(100, 10) y = np.ones(X.shape[0]) model = PySRRegressor( @@ -106,22 +90,22 @@ def test_pipeline(self): self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)") np.testing.assert_almost_equal( - tformat(torch.tensor(X)).detach().numpy(), + tformat(self.torch.tensor(X)).detach().numpy(), np.square(np.cos(X[:, 1])), # 2nd feature decimal=3, ) def test_mod_mapping(self): - torch = _import_torch() x, y, z = sympy.symbols("x y z") expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z module = sympy2torch(expression, [x, y, z]) - X = torch.rand(100, 3).float() * 10 + X = self.torch.rand(100, 3).float() * 10 true_out = ( - X[:, 0] ** 2 + torch.atanh(torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2] + X[:, 0] ** 2 + + self.torch.atanh(self.torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2] ) torch_out = module(X) @@ -130,7 +114,6 @@ def test_mod_mapping(self): ) def test_custom_operator(self): - torch = _import_torch() X = np.random.randn(100, 3) y = np.ones(X.shape[0]) model = PySRRegressor( @@ -156,7 +139,7 @@ def test_custom_operator(self): model.set_params( equation_file="equation_file_custom_operator.csv", extra_sympy_mappings={"mycustomoperator": sympy.sin}, - extra_torch_mappings={"mycustomoperator": torch.sin}, + extra_torch_mappings={"mycustomoperator": self.torch.sin}, ) model.refresh(checkpoint_file="equation_file_custom_operator.csv") self.assertEqual(str(model.sympy()), "sin(x1)") @@ -165,13 +148,12 @@ def test_custom_operator(self): tformat = model.pytorch() self.assertEqual(str(tformat), "_SingleSymPyModule(expression=sin(x1))") np.testing.assert_almost_equal( - tformat(torch.tensor(X)).detach().numpy(), + tformat(self.torch.tensor(X)).detach().numpy(), np.sin(X[:, 1]), decimal=3, ) def test_feature_selection_custom_operators(self): - torch = _import_torch() rstate = np.random.RandomState(0) X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)}) cos_approx = lambda x: 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720 @@ -196,16 +178,20 @@ def test_feature_selection_custom_operators(self): np_output = model.predict(X.values) - torch_output = torch_module(torch.tensor(X.values)).detach().numpy() + torch_output = torch_module(self.torch.tensor(X.values)).detach().numpy() np.testing.assert_almost_equal(y.values, np_output, decimal=3) np.testing.assert_almost_equal(y.values, torch_output, decimal=3) -def runtests(): +def runtests(just_tests=False): """Run all tests in test_torch.py.""" + tests = [TestTorch] + if just_tests: + return tests loader = unittest.TestLoader() suite = unittest.TestSuite() - suite.addTests(loader.loadTestsFromTestCase(TestTorch)) + for test in tests: + suite.addTests(loader.loadTestsFromTestCase(test)) runner = unittest.TextTestRunner() return runner.run(suite) diff --git a/pysr/version.py b/pysr/version.py deleted file mode 100644 index b127c680..00000000 --- a/pysr/version.py +++ /dev/null @@ -1,2 +0,0 @@ -__version__ = "0.16.5" -__symbolic_regression_jl_version__ = "0.22.5" diff --git a/requirements.txt b/requirements.txt index 876db3e8..58ee1c41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ -sympy -pandas>=0.21.0 -numpy -scikit_learn>=1.0.0 -julia>=0.6.0 -click>=7.0.0 +sympy>=1.0.0,<2.0.0 +pandas>=0.21.0,<3.0.0 +numpy>=1.13.0,<2.0.0 +scikit_learn>=1.0.0,<2.0.0 +juliacall==0.9.20 +click>=7.0.0,<9.0.0 setuptools>=50.0.0 +typing_extensions>=4.0.0,<5.0.0; python_version < "3.8" diff --git a/setup.py b/setup.py index d9f026f4..2cf7ba8e 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,32 @@ -import setuptools +import os -try: - with open("README.md", "r", encoding="utf8") as fh: - long_description = fh.read() -except FileNotFoundError: - long_description = "" +from setuptools import setup -exec(open("pysr/version.py").read()) +if os.path.exists(".git"): + kwargs = { + "use_scm_version": { + "write_to": "pysr/version.py", + }, + "setup_requires": ["setuptools", "setuptools_scm"], + } +else: + # Read from pyproject.toml directly + import re -setuptools.setup( - name="pysr", - version=__version__, - author="Miles Cranmer", - author_email="miles.cranmer@gmail.com", - description="Simple and efficient symbolic regression", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/MilesCranmer/pysr", - # Read from requirements.txt: - install_requires=open("requirements.txt").read().splitlines(), - packages=setuptools.find_packages(), - package_data={"pysr": ["../Project.toml", "../datasets/*"]}, - include_package_data=False, - classifiers=[ - "Programming Language :: Python :: 3", - "Operating System :: OS Independent", - ], - python_requires=">=3.7", -) + with open(os.path.join(os.path.dirname(__file__), "pyproject.toml")) as f: + data = f.read() + # Find the version + version = re.search(r'version = "(.*)"', data).group(1) + + # Write the version to version.py + with open(os.path.join(os.path.dirname(__file__), "pysr", "version.py"), "w") as f: + f.write(f'__version__ = "{version}"') + + kwargs = { + "use_scm_version": False, + "version": version, + } + + +# Build options are managed in pyproject.toml +setup(**kwargs)