# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.20.*, run `git checkout -b 0.20` or switch to the `0.20` branch on GitHub) - name: text-moderator kind: RealtimeAPI predictor: type: python path: predictor_with_model_download.py processes_per_replica: 1 threads_per_process: 32 compute: # cpu: 1 gpu: 1 # this is optional, since the api can also run on cpu autoscaling: # (aws only) min_replicas: 1 # minimum number of replicas (default: 1) max_replicas: 4 # maximum number of replicas (default: 100) init_replicas: 1 # initial number of replicas (default: ) window: 10s # the time over which to average the API's concurrency (default: 60s) upscale_stabilization_period: 0s # the API will not scale above the lowest recommendation made during this period (default: 1m)