# WARNING: you are on the master branch; please refer to examples on the branch corresponding to your `cortex version` (e.g. for version 0.20.*, run `git checkout -b 0.20` or switch to the `0.20` branch on GitHub)

- name: text-moderator
  kind: RealtimeAPI
  predictor:
    type: python
    path: predictor_with_model_download.py
    processes_per_replica: 1
    threads_per_process: 32
  compute:
    # cpu: 1
    gpu: 1 # this is optional, since the api can also run on cpu
  autoscaling:  # (aws only)
    min_replicas: 1  # minimum number of replicas (default: 1)
    max_replicas: 4  # maximum number of replicas (default: 100)
    init_replicas: 1  # initial number of replicas (default: <min_replicas>)
    window: 10s  # the time over which to average the API's concurrency (default: 60s)
    upscale_stabilization_period: 0s  # the API will not scale above the lowest recommendation made during this period (default: 1m)