0

I'm trying to start an EC2Cluster using dask_cloudprovide, i have already done it for fargate and i figured that for EC2 would be similar. Problem: I tried to do it the most basic way possible:

from dask_cloudprovider.aws import EC2Cluster
from dask.distributed import Client
cluster = EC2Cluster(
    n_workers=1, 
)

But even with this basic code i stil get the following output:

Creating scheduler instance 2023-03-22 17:24:14,805 - distributed.deploy.spec - WARNING - Cluster closed without starting up --------------------------------------------------------------------------- ClientError Traceback (most recent call last) File ~/.local/lib/python3.10/site-packages/distributed/deploy/spec.py:320, in SpecCluster._start(self) 319 self.scheduler = cls(**self.scheduler_spec.get("options", {})) --> 320 self.scheduler = await self.scheduler 321 self.scheduler_comm = rpc( 322 getattr(self.scheduler, "external_address", None) 323 or self.scheduler.address, 324 connection_args=self.security.get_connection_args("client"), 325 )

File ~/.local/lib/python3.10/site-packages/distributed/deploy/spec.py:68, in ProcessInterface.await.._() 67 if self.status == Status.created: ---> 68 await self.start() 69 assert self.status == Status.running

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/generic/vmcluster.py:88, in SchedulerMixin.start(self) 87 self.cluster._log("Creating scheduler instance") ---> 88 ip = await self.create_vm() 89 self.address = f"{self.cluster.protocol}://{ip}:{self.port}"

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/aws/ec2.py:164, in EC2Instance.create_vm(self) 162 vm_kwargs["Placement"] = {"AvailabilityZone": self.availability_zone} --> 164 response = await client.run_instances(**vm_kwargs) 165 [self.instance] = response["Instances"]

File ~/.local/lib/python3.10/site-packages/aiobotocore/client.py:371, in AioBaseClient._make_api_call(self, operation_name, api_params) 370 error_class = self.exceptions.from_code(error_code) --> 371 raise error_class(parsed_response, operation_name) 372 else:

ClientError: An error occurred (InvalidParameterValue) when calling the RunInstances operation: User data is limited to 16384 bytes

During handling of the above exception, another exception occurred:

TypeError Traceback (most recent call last) Cell In[8], line 1 ----> 1 cluster = EC2Cluster( 2 n_workers=1, 3 )

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/aws/ec2.py:605, in EC2Cluster.init(self, region, availability_zone, bootstrap, auto_shutdown, ami, instance_type, scheduler_instance_type, worker_instance_type, vpc, subnet_id, security_groups, filesystem_size, key_name, iam_instance_profile, docker_image, debug, instance_tags, volume_tags, use_private_ip, enable_detailed_monitoring, **kwargs) 603 self.scheduler_options["instance_type"] = self.scheduler_instance_type 604 self.worker_options["instance_type"] = self.worker_instance_type --> 605 super().init(debug=debug, **kwargs)

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/generic/vmcluster.py:297, in VMCluster.init(self, n_workers, worker_class, worker_options, scheduler_options, docker_image, docker_args, extra_bootstrap, env_vars, security, protocol, debug, **kwargs) 294 self.worker_options["extra_bootstrap"] = extra_bootstrap 295 self.uuid = str(uuid.uuid4())[:8] --> 297 super().init(**kwargs, security=self.security)

File ~/.local/lib/python3.10/site-packages/distributed/deploy/spec.py:286, in SpecCluster.init(self, workers, scheduler, worker, asynchronous, loop, security, silence_logs, name, shutdown_on_close, scheduler_sync_interval) 284 if not called_from_running_loop: 285 self._loop_runner.start() --> 286 self.sync(self._start) 287 try: 288 self.sync(self._correct_state)

File ~/.local/lib/python3.10/site-packages/distributed/utils.py:345, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, **kwargs) 343 return future 344 else: --> 345 return sync( 346 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs 347 )

File ~/.local/lib/python3.10/site-packages/distributed/utils.py:412, in sync(loop, func, callback_timeout, *args, **kwargs) 410 if error: 411 typ, exc, tb = error --> 412 raise exc.with_traceback(tb) 413 else: 414 return result

File ~/.local/lib/python3.10/site-packages/distributed/utils.py:385, in sync..f() 383 future = wait_for(future, callback_timeout) 384 future = asyncio.ensure_future(future) --> 385 result = yield future 386 except Exception: 387 error = sys.exc_info()

File ~/.local/lib/python3.10/site-packages/tornado/gen.py:769, in Runner.run(self) 766 exc_info = None 768 try: --> 769 value = future.result() 770 except Exception: 771 exc_info = sys.exc_info()

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/generic/vmcluster.py:339, in VMCluster._start(self) 329 self.worker_spec = { 330 self._new_worker_name(i): self.new_spec for i in range(self._n_workers) 331 } 333 with warn_on_duration( 334 "10s", 335 "Creating your cluster is taking a surprisingly long time. " 336 "This is likely due to pending resources. " 337 "Hang tight! ", 338 ): --> 339 await super()._start()

File ~/.local/lib/python3.10/site-packages/distributed/deploy/spec.py:329, in SpecCluster._start(self) 327 except Exception as e: # pragma: no cover 328 self.status = Status.failed --> 329 await self._close() 330 raise RuntimeError(f"Cluster failed to start: {e}") from e

File ~/.local/lib/python3.10/site-packages/distributed/deploy/spec.py:453, in SpecCluster._close(self) 450 logger.warning("Cluster closed without starting up") 452 if self.scheduler: --> 453 await self.scheduler.close() 454 for w in self._created: 455 assert w.status in { 456 Status.closing, 457 Status.closed, 458 Status.failed, 459 }, w.status

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/generic/vmcluster.py:60, in VMInterface.close(self) 58 async def close(self): 59 """Destroy a VM.""" ---> 60 await self.destroy_vm() 61 await super().close()

File ~/.local/lib/python3.10/site-packages/dask_cloudprovider/aws/ec2.py:228, in EC2Instance.destroy_vm(self) 223 boto_config = botocore.config.Config(retries=dict(max_attempts=10)) 224 async with self.cluster.boto_session.create_client( 225 "ec2", region_name=self.region, config=boto_config 226 ) as client: 227 await client.terminate_instances( --> 228 InstanceIds=[self.instance["InstanceId"]], DryRun=False 229 ) 230 self.cluster._log(f"Terminated {self.name} ({self.instance['InstanceId']})")

TypeError: 'NoneType' object is not subscriptable

Can anyone tell me what's wrong because by reading the documentation i don't see any required parameters, even reading the error it looks like some InstanceId parameter is missing , but there is no such parameter in the dask_cloudprovider documentation.

1 Answer 1

1

The error you are seeing is this one:

ClientError: An error occurred (InvalidParameterValue) when calling the RunInstances operation: User data is limited to 16384 bytes

This is a known problem, see https://github.com/dask/dask-cloudprovider/issues/249 . The current solution is not to use TLS in your dask configuration (security=False) and ensure network safety in other AWS-specific ways.

Sign up to request clarification or add additional context in comments.

2 Comments

Thanks for the answer , i have read the github topic you sent and i think it might be it , i had no time to test it yet , but once i do i will give some feedback.
Just to confirm that it did the trick

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.