diff --git a/src/Benchmarks/Traversers/WriteOne.h b/src/Benchmarks/Traversers/WriteOne.h
index 73bf0bfecfb3f111f0feac0e6091aa9549430673..9fd269f1091940bc0e418486306fdd22973e7c70 100644
--- a/src/Benchmarks/Traversers/WriteOne.h
+++ b/src/Benchmarks/Traversers/WriteOne.h
@@ -39,13 +39,13 @@ class WriteOne< 1, Device, Real, Index >
       static void run( std::size_t size )
       {
          Vector v( size );
-         auto writeOne = []( Index i, Real* data )
+         auto writeOne = [] __cuda_callable__ ( Index i, Real* data )
          {
             data[ i ] = 1.0;
          };
          
          
-         ParallelFor< Devices::Host >::exec( ( std::size_t ) 0, size, writeOne, v.getData() );
+         ParallelFor< Device >::exec( ( std::size_t ) 0, size, writeOne, v.getData() );
       }
 };