Download OpenAPI specification:
The ASTRA-sim API provides a standardized interface for submitting configurations, executing jobs, checking job status, and retrieving results.
The uploaded file is a ZIP archive that contains execution traces packaged together in ZIP format.
{- "message": "string",
- "warnings": [
- "string"
]
}object (Common.Configuration) Holds the schemas that define ASTRA-sim configurations common to all backends, including workload, system, communicator group, logging, remote memory, and command parameters. | |
object (Network.Backend) A choice of network backends of ASTRA-sim which will run the simulation. | |
object (Infragraph) InfraGraph or infrastructure graph defines a model-driven, vendor-neutral, standard interface for capturing a system of systems suitable for use in co-designing AI/HPC solutions. This model allows for defining physical infrastructure as a logical system of systems using graph like terminology. In addition to defining a logical graph, an unlimited number of different physical characteristics can be associated with logical endpoints. |
{- "common_config": {
- "workload": "string",
- "system": {
- "scheduling_policy": "LIFO",
- "all_reduce_implementation": [
- "ring"
], - "reduce_scatter_implementation": [
- "ring"
], - "all_gather_implementation": [
- "ring"
], - "all_to_all_implementation": [
- "ring"
], - "all_to_all_implementation_custom": [
- "string"
], - "all_gather_implementation_custom": [
- "string"
], - "all_reduce_implementation_custom": [
- "string"
], - "collective_optimization": "baseline",
- "local_reduction_delay": 0,
- "active_chunks_per_dimension": 1,
- "latency": 0.1,
- "overhead": 0.1,
- "gap": 0.1,
- "global_memory": 0.1,
- "endpoint_delay": 0,
- "model_shared_bus": 0,
- "preferred_dataset_splits": 1,
- "peak_perf": 0.1,
- "local_mem_bw": 50,
- "roofline_enabled": 0,
- "trace_enabled": 0,
- "replay_only": 0,
- "track_local_mem": 0,
- "local_mem_trace_filename": "string"
}, - "communicator_group": [
- {
- "identifier": "string",
- "npu_list": [
- 0
]
}
], - "remote_memory": {
- "memory_type": "NO_MEMORY_EXPANSION",
- "remote_mem_latency": 0,
- "remote_mem_bw": 0,
- "num_nodes": 0,
- "num_npus_per_node": 0
}, - "logging": {
- "sink": [
- {
- "name": "string",
- "type": "string",
- "filename": "string",
- "truncate": true,
- "create_parent_dir": true
}
], - "logger": [
- {
- "name": "string",
- "sinks": [
- "string"
], - "level": "trace",
- "pattern": "string"
}
]
}, - "cmd_parameters": {
- "num_queues_per_dim": 1,
- "comm_scale": 1,
- "injection_scale": 1,
- "rendezvous_protocol": false
}
}, - "network_backend": {
- "choice": "analytical_congestion_aware",
- "analytical_congestion_aware": {
- "topology": {
- "choice": "network",
- "network": [
- {
- "topology": "ring",
- "npus_count": 0,
- "bandwidth": 0.1,
- "latency": 0.1
}
]
}
}, - "analytical_congestion_unaware": {
- "topology": {
- "choice": "network",
- "network": [
- {
- "topology": "ring",
- "npus_count": 0,
- "bandwidth": 0.1,
- "latency": 0.1
}
]
}
}, - "ns3": {
- "network": {
- "enable_qcn": 1,
- "use_dynamic_pfc_threshold": 1,
- "packet_payload_size": 1024,
- "topology_file": "nc-topology.txt",
- "flow_file": "flow.txt",
- "trace_file": "trace.txt",
- "trace_output_file": "trace_out.tr",
- "fct_output_file": "fct.txt",
- "pfc_output_file": "pfc.txt",
- "simulator_stop_time": 40000000000000,
- "cc_mode": 12,
- "alpha_resume_interval": 1,
- "rate_decrease_interval": 4,
- "clamp_target_rate": 0,
- "rp_timer": 900,
- "ewma_gain": 0.00390625,
- "fast_recovery_times": 1,
- "rate_ai": "50Mb/s",
- "rate_hai": "100Mb/s",
- "min_rate": "100Mb/s",
- "dctcp_rate_ai": "1000Mb/s",
- "error_rate_per_link": 0,
- "l2_chunk_size": 4000,
- "l2_ack_interval": 1,
- "l2_back_to_zero": 0,
- "has_win": 1,
- "global_t": 0,
- "var_win": 1,
- "fast_react": 1,
- "u_target": 0.95,
- "mi_thresh": 0,
- "int_multi": 1,
- "multi_rate": 0,
- "sample_feedback": 0,
- "pint_log_base": 1.05,
- "pint_prob": 1,
- "rate_bound": 1,
- "ack_high_prio": 0,
- "link_down": [
- 0,
- 0,
- 0
], - "enable_trace": 1,
- "kmax_map": "6 25000000000 400 40000000000 800 100000000000 1600 200000000000 2400 400000000000 3200 2400000000000 3200",
- "kmin_map": "6 25000000000 100 40000000000 200 100000000000 400 200000000000 600 400000000000 800 2400000000000 800",
- "pmax_map": "6 25000000000 0.2 40000000000 0.2 100000000000 0.2 200000000000 0.2 400000000000 0.2 2400000000000 0.2",
- "buffer_size": 32,
- "qlen_mon_file": "qlen.txt",
- "qlen_mon_start": 0,
- "qlen_mon_end": 20000,
- "nic_total_pause_time": 0
}, - "topology": {
- "choice": "nc_topology",
- "nc_topology": {
- "total_nodes": 0,
- "total_switches": 0,
- "total_links": 0,
- "switch_ids": [
- 1
], - "connections": [
- {
- "source_index": 0,
- "destination_index": 0,
- "bandwidth": "string",
- "latency": "string",
- "error_rate": "string"
}
]
}
}, - "logical_topology": {
- "logical_dimensions": [
- 0
]
}, - "trace": {
- "trace_ids": [
- 0
]
}
}, - "htsim": {
- "htsim_protocol": {
- "choice": "tcp",
- "tcp": {
- "o": "string",
- "sub": "string",
- "nodes": "string",
- "tcp_protocol": "UNCOUPLED"
}
}, - "topology": {
- "choice": "network_topology_configuration",
- "network_topology_configuration": {
- "network": [
- {
- "topology": "ring",
- "npus_count": 0,
- "bandwidth": 0.1,
- "latency": 0.1
}
], - "htsim_topology": {
- "choice": "fat_tree",
- "fat_tree": {
- "nodes": 0,
- "tiers": 2,
- "podsize": 0,
- "tier_0": {
- "downlink_speed_gbps": 0,
- "radix_up": 0,
- "radix_down": 0,
- "queue_up": 0,
- "queue_down": 0,
- "oversubscribed": 0,
- "bundle": 0,
- "switch_latency_ns": 0,
- "downlink_latency_ns": 0
}, - "tier_1": {
- "downlink_speed_gbps": 0,
- "radix_up": 0,
- "radix_down": 0,
- "queue_up": 0,
- "queue_down": 0,
- "oversubscribed": 0,
- "bundle": 0,
- "switch_latency_ns": 0,
- "downlink_latency_ns": 0
}, - "tier_2": {
- "downlink_speed_gbps": 0,
- "radix_down": 0,
- "queue_down": 0,
- "oversubscribed": 0,
- "bundle": 0,
- "switch_latency_ns": 0,
- "downlink_latency_ns": 0
}
}
}
}
}
}
}, - "infragraph": {
- "infrastructure": {
- "name": "string",
- "description": "string",
- "devices": [
- {
- "name": "string",
- "description": "string",
- "components": [
- {
- "name": "string",
- "description": "string",
- "count": 0,
- "choice": "custom",
- "custom": {
- "type": "string"
}, - "device": { },
- "cpu": { },
- "xpu": { },
- "nic": { },
- "memory": { },
- "port": { },
- "switch": { }
}
], - "links": [
- {
- "name": "string",
- "description": "string",
- "physical": {
- "bandwidth": {
- "choice": "gigabits_per_second",
- "gigabits_per_second": 0,
- "gigabytes_per_second": 0,
- "gigatransfers_per_second": 0
}, - "latency": {
- "choice": "ms",
- "ms": 0,
- "us": 0,
- "ns": 0
}
}
}
], - "edges": [
- {
- "ep1": {
- "device": "string",
- "component": "string"
}, - "ep2": {
- "device": "string",
- "component": "string"
}, - "scheme": "one2one",
- "link": "string"
}
]
}
], - "links": [
- {
- "name": "string",
- "description": "string",
- "physical": {
- "bandwidth": {
- "choice": "gigabits_per_second",
- "gigabits_per_second": 0,
- "gigabytes_per_second": 0,
- "gigatransfers_per_second": 0
}, - "latency": {
- "choice": "ms",
- "ms": 0,
- "us": 0,
- "ns": 0
}
}
}
], - "instances": [
- {
- "name": "string",
- "description": "string",
- "device": "string",
- "count": 0
}
], - "edges": [
- {
- "ep1": {
- "instance": "string",
- "component": "string"
}, - "ep2": {
- "instance": "string",
- "component": "string"
}, - "scheme": "one2one",
- "link": "string"
}
]
}, - "annotations": {
- "link_specifications": [
- {
- "link_name": "string",
- "packet_loss_rate": 0.1,
- "link_error_rate": 0.1
}
], - "device_specifications": [
- {
- "device_name": "string",
- "device_type": "host",
- "device_latency_ms": 0.1,
- "device_bandwidth_gbps": 0.1,
- "radix_up": 0,
- "radix_down": 0,
- "queue_up": 0,
- "queue_down": 0
}
], - "rank_assignment": [
- {
- "rank_identifier": 0,
- "npu_identifier": "string"
}
]
}
}
}{- "message": "string",
- "warnings": [
- "string"
]
}| choice | string Enum: "start" "stop" Represents a control interface that allows users to choose between starting or stopping ASTRA-sim simulation. Only one action — start or stop— can be performed at a time. Users must specify either the configuration to start a backend or the command to stop it, ensuring that these actions are mutually exclusive within a single operation. |
object (Control.Start) Specifies the operation to set control to a specific network backend, allowing the user to initiate an action on the selected backend. |
{- "choice": "start",
- "start": {
- "backend": "analytical_congestion_aware"
}
}{- "message": "string",
- "warnings": [
- "string"
]
}In the result API payload, the user can choose to provide either a filename or a metadata. When a metadata is provided, the server returns metadata about the generated result files. If a filename is specified, the corresponding file will be downloaded. Filenames can be obtained from the metadata returned by the server when using the metadata option.
| choice | string Enum: "metadata" "filename" The type of result required: metadata or file. |
| filename | string A single file metadata object describing filename which is associated with the result. |
{- "choice": "metadata",
- "filename": "string"
}{- "code": 0,
- "kind": "validation",
- "errors": [
- "string"
]
}