Merge branch 'develop'

clMathLibraries · Oct 19, 2015 · f34c68b · f34c68b
2 parents 1ca97a4 + 92997f5
commit f34c68b
Show file tree

Hide file tree

Showing 62 changed files with 40,309 additions and 1,390 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,6 @@
 
 # ignore build directory if name is 'build'
 build/
+
+# ignore tilde files
+*~
diff --git a/.travis.yml b/.travis.yml
@@ -1,36 +1,134 @@
+# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
+# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
+# Ubuntu 12.10 (Quantal Quetzal)
+# Ubuntu 13.04 (Raring Ringtail)
+# Ubuntu 13.10 (Saucy Salamander)
+# Ubuntu 14.04 LTS (Trusty Tahr)
+# Ubuntu 14.10 (Utopic Unicorn)
+# Ubuntu 15.04 (Vivid Vervet)
+# Ubuntu 15.10 (Wily Werewolf)
+# Ubuntu 16.04 LTS (Xenial Xantus)
+
+# language: instructs travis what compilers && environment to set up in build matrix
 language: cpp
 
+# sudo: false instructs travis to build our project in a docker VM (faster)
+# Can not yet install fglrx packages with 'false'
+sudo: required # false
+
+# os: expands the build matrix to include multiple os's
+# disable linux, as we get sporadic failures on building boost, needs investigation
+os:
+  - linux
+  - osx
+
+# compiler: expands the build matrix to include multiple compilers (per os)
 compiler:
   - gcc
+  - clang
+
+addons:
+  # apt: is disabled on osx builds
+  # apt: needed by docker framework to install project dependencies without
+  # sudo.  Apt uses published Ubunto PPA's from https://launchpad.net/
+  # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
+  apt:
+    sources:
+      # ubuntu-toolchain-r-test contains newer versions of gcc to install
+      # - ubuntu-toolchain-r-test
+      # llvm-toolchain-precise-3.6 contains newer versions of clang to install
+      # - llvm-toolchain-precise-3.6
+      # kubuntu-backports contains newer versions of cmake to install
+      - kubuntu-backports
+      # boost-latest contains boost v1.55
+      - boost-latest
+    packages:
+      # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
+      # - g++-4.8
+      # - clang-3.6
+      # We require v2.8.12 minimum
+      - cmake
+      # I'm finding problems between pre-compiled versions of boost ublas, with gtest
+      # stl_algobase.h: error: no matching function for call to swap()
+      - libboost-program-options1.55-dev
+      # - libboost-serialization1.55-dev
+      # - libboost-filesystem1.55-dev
+      # - libboost-system1.55-dev
+      # - libboost-regex1.55-dev
+      # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
+#      - opencl-headers
+      # Uncomment one of the following when fglrx modules are added to the apt whitelist
+#      - fglrx
+#      - fglrx=2:8.960-0ubuntu1
+#      - fglrx=2:13.350.1-0ubuntu0.0.1
+
+# env: specifies additional global variables to define per row in build matrix
+env:
+  global:
+    - CLFFT_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
+
+# The following filters our build matrix; we are interested in linux-gcc & osx-clang
+matrix:
+  exclude:
+    - os: linux
+      compiler: clang
+    - os: osx
+      compiler: gcc
 
 before_install:
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq fglrx opencl-headers libboost-program-options-dev libfftw3-dev
-# Uncomment below to help verify the installs above work
-#  - ls -la /usr/lib/libboost*
-#  - ls -la /usr/include/boost
+  # Remove the following linux clause when fglrx can be installed with sudo: false
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      sudo apt-get update -qq &&
+      sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+      brew update;
+      brew outdated boost || brew upgrade boost;
+      brew outdated cmake || brew upgrade cmake;
+    fi
+  # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
+  - cmake --version;
+  - ${CC} --version;
+  - ${CXX} --version;
 
+install:
+  # 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
+  # Remove when the travis VM upgrades to 'trusty' or beyond
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      mkdir -p ${OPENCL_ROOT}/include/CL;
+      pushd ${OPENCL_ROOT}/include/CL;
+      wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
+      popd;
+    fi
+  # osx image does not contain cl.hpp file; download from Khronos
+  # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+  #     pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
+  #     sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
+  #     popd;
+  #   fi
+
+# Use before_script: to run configure steps
 before_script:
-  - cd ${TRAVIS_BUILD_DIR}
-  - mkdir -p bin/clFFT
-  - cd bin/clFFT
-  - cmake -DBoost_NO_SYSTEM_PATHS=OFF -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
-
-script: 
-  - make install
-#  - ls -Rla package
-# Run a simple test to validate that the build works; CPU device in a VM
-  - cd package/bin
-  - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clFFT/package/lib64:${LD_LIBRARY_PATH}
-  - ./clFFT-client -i
-
-after_success:
-  - cd ${TRAVIS_BUILD_DIR}/bin/clFFT
+  - mkdir -p ${CLFFT_ROOT}
+  - pushd ${CLFFT_ROOT}
+  - cmake -DCMAKE_BUILD_TYPE=Release -DBoost_NO_SYSTEM_PATHS=OFF -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
+
+# use script: to execute build steps
+script:
   - make package
 
-notifications:
-   email:
-     - [email protected]
-   on_success: change
-   on_failure: always
-
+deploy:
+  provider: releases
+  prerelease: true
+  draft: true
+  skip_cleanup: true
+  api_key:
+    secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHmDDsdf7yRDJBegNcKfw4+m19MIvLn9fbiNVCtwCAL1T4yWkIEpi4MRMDPtftmkZPbi6UwluOJUTeCeHe4en99Yu2haemNPqXs6rR0LlXGk31GQwzlrNfb+94F5tT2a4Ka4PsruA2NMW/IYCYEE5Gu7PihVDR031Fn9cdCU9kefUgyB07rJD6q/W+ljsU0osyg7VxyfMg8rkw=
+  file: ${CLFFT_ROOT}/*.tar.gz
+  file_glob: true
+  on:
+    all_branches: true
+    tags: true
diff --git a/README.md b/README.md
@@ -1,16 +1,25 @@
+## Build Status
+| Build branch | master | develop |
+|-----|-----|-----|
+| GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=master)](https://travis-ci.org/clMathLibraries/clFFT/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clFFT/branches) |
+| Visual Studio x64 |  |[![Build status](https://ci.appveyor.com/api/projects/status/facii32v72y98opv/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clfft-whc3m/branch/develop) |
+
 clFFT
 =====
-[![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT)
-
 clFFT is a software library containing FFT functions written
-in OpenCL. In addition to GPU devices, the libraries also support
+in OpenCL. In addition to GPU devices, the library also supports
 running on CPU devices to facilitate debugging and heterogeneous
 programming.
 
 Pre-built binaries are available [here][binary_release].
 
 ## What's New
 
+-   Support for power-of-7 size transforms
+-   Pre-callback feature that enables custom pre-processing
+    of input data directly by the library with user callback function
+-   Support for 1D large size transforms with no extra memory allocation
+    requirement for certain sizes
 -   Significant uplift of 1D complex transform performance
 -   Significant uplift of 1D real transform performance for power-of-2 sizes
 -   1D large size limit relaxation for complex transforms
@@ -23,7 +32,7 @@ Pre-built binaries are available [here][binary_release].
 
 The FFT is an implementation of the Discrete Fourier Transform (DFT)
 that makes use of symmetries in the FFT definition to reduce the
-mathematical intensity required from O(N2) to O(N log2( N )) when the
+mathematical intensity required from O(N^2) to O(N log2(N)) when the
 sequence length N is the product of small prime factors. Currently,
 there is no standard API for FFT routines. Hardware vendors usually
 provide a set of high-performance FFTs optimized for their systems: no
@@ -33,23 +42,22 @@ processors, but also are functional across CPU and other compute
 devices.
 
 The clFFT library is an open source OpenCL library implementation of
-discrete Fast Fourier Transforms. It:
+discrete Fast Fourier Transforms. The library:
 
--   Provides a fast and accurate platform for calculating discrete FFTs.
+-   provides a fast and accurate platform for calculating discrete FFTs.
 
--   Works on CPU or GPU backends.
+-   works on CPU or GPU backends.
 
--   Supports in-place or out-of-place transforms.
+-   supports in-place or out-of-place transforms.
 
--   Supports 1D, 2D, and 3D transforms with a batch size that can be
+-   supports 1D, 2D, and 3D transforms with a batch size that can be
     greater than 1.
 
--   Supports planar (real and complex components in separate arrays) and
+-   supports planar (real and complex components in separate arrays) and
     interleaved (real and complex components as a pair contiguous in
     memory) formats.
 
--   Supports dimension lengths that can be any mix of powers of 2, 3,
-    and 5.
+-   supports dimension lengths that can be any combination of powers of 2, 3, 5, and 7.
 
 -   Supports single and double precision floating point formats.
 
@@ -60,13 +68,13 @@ a GitHub Pages website
 
 ### Google Groups
 
-Two mailing lists have been created for the clMath projects:
+Two mailing lists exist for the clMath projects:
 
 -   [[email protected]][] - group whose focus is to answer
     questions on using the library or reporting issues
 
 -   [[email protected]][] - group whose focus is for
-    developers interested in contributing to the library code itself
+    developers interested in contributing to the library code
 
 ## clFFT Wiki
 
@@ -78,7 +86,7 @@ primer][[email protected]]
 
 Please refer to and read the [Contributing][] document for guidelines on
 how to contribute code to this open source project. The code in the
-/master branch is considered to be stable, and all pull-requests should
+/master branch is considered to be stable, and all pull-requests must
 be made against the /develop branch.
 
 ## License
@@ -88,7 +96,7 @@ The source for clFFT is licensed under the [Apache License, Version
 
 ## Example
 
-The simple example below shows how to use clFFT to compute an simple 1D
+The following simple example shows how to use clFFT to compute a simple 1D
 forward transform
 ```c
 #include <stdlib.h>
@@ -109,12 +117,12 @@ int main( void )
     cl_event event = NULL;
     int ret = 0;
 	size_t N = 16;
-	
+
 	/* FFT library realted declarations */
 	clfftPlanHandle planHandle;
 	clfftDim dim = CLFFT_1D;
 	size_t clLengths[1] = {N};
-                
+
     /* Setup OpenCL environment. */
     err = clGetPlatformIDs( 1, &platform, NULL );
     err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
@@ -131,7 +139,7 @@ int main( void )
 	/* Allocate host & initialize data. */
 	/* Only allocation shown for simplicity. */
 	X = (float *)malloc(N * 2 * sizeof(*X));
-                
+
     /* Prepare OpenCL memory objects and place data inside them. */
     bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );
 
@@ -140,15 +148,15 @@ int main( void )
 
 	/* Create a default plan for a complex FFT. */
 	err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
-	
+
 	/* Set plan parameters. */
 	err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
 	err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
 	err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);
-                                
+
     /* Bake the plan. */
 	err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
-	
+
 	/* Execute the plan. */
 	err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);
 
@@ -162,7 +170,7 @@ int main( void )
     clReleaseMemObject( bufX );
 
 	free(X);
-	
+
 	/* Release the plan. */
 	err = clfftDestroyPlan( &planHandle );
 
@@ -179,29 +187,30 @@ int main( void )
 
 ## Build dependencies
 
-### Library for Windows
+### Library for Windows   
+To develop the clFFT library code on a Windows operating system, ensure to install the following packages on your system:
 
--   Windows® 7/8
+-   Windows® 7/8.1
 
--   Visual Studio 2010 SP1, 2012
+-   Visual Studio 2012 or later
 
 -   Latest CMake
 
--   An OpenCL SDK, such as APP SDK 2.9
+-   An OpenCL SDK, such as APP SDK 3.0
 
 ### Library for Linux
-
+To develop the clFFT library code on a Linux operating system, ensure to install the following packages on your system:
 -   GCC 4.6 and onwards
 
 -   Latest CMake
 
--   An OpenCL SDK, such as APP SDK 2.9
+-   An OpenCL SDK, such as APP SDK 3.0
 
 ### Library for Mac OSX
-
--   Recommended to generate Unix makefiles with cmake
+To develop the clFFT library code on a Mac OS X, it is recommended to generate Unix makefiles with cmake.
 
 ### Test infrastructure
+To test the developed clFFT library code, ensure to install the following packages on your system:
 
 -   Googletest v1.6
 
@@ -210,8 +219,7 @@ int main( void )
 -   Latest Boost
 
 ### Performance infrastructure
-
--   Python
+To measure the performance of the clFFT library code, ensure that the Python package is installed on your system.
 
   [Library and API documentation]: http://clmathlibraries.github.io/clFFT/
   [[email protected]]: https://github.com/clMathLibraries/clFFT/wiki