"Material Iteration"

private void RenderCache_IterateMaterial(int materialIndex) {
	// Set up context variables
	KeyValuePair<Material, ModelInstanceManager.MIDArray> currentKVP = currentInstanceData[materialIndex];
	Material currentMaterial = currentKVP.Key;
	ModelInstanceManager.MIDArray currentMID = currentKVP.Value;

	// Skip this material if it or its shader are disposed
	if (currentMaterial.IsDisposed || currentMaterial.Shader.IsDisposed) return;

	// Skip this material if we're not using it
	bool inUse = false;
	for (int i = 0; i < currentMID.Length; ++i) {
		if (currentMID.Data[i].InUse) {
			inUse = true;
			break;
		}
	}
	if (!inUse) return;

	// Prepare shader according to material params, and switch to it or update it
	if (lastSetFragmentShader != currentMaterial.Shader || lastFrameNum != frameNum) {
		lastSetFragmentShader = currentMaterial.Shader;
		lastFrameNum = frameNum;
		QueueShaderSwitch(lastSetFragmentShader);
	}
	var queuedSRP = currentMaterial.FragmentShaderResourcePackage;
	if (lastSetFragmentShader == geomFSWithShadowSupport) {
		if (modifiedSRP == null) modifiedSRP = new ShaderResourcePackage();
		modifiedSRP.CopyFrom(queuedSRP);
		modifiedSRP.SetValue((ResourceViewBinding) lastSetFragmentShader.GetBindingByIdentifier("ShadowMap"), previousShadowBufferSRV);
		queuedSRP = modifiedSRP;
	}
	QueueShaderResourceUpdate(lastSetFragmentShader, queuedSRP);

	// Filter & sort
	if (materialFilteringWorkspace == null || materialFilteringWorkspace.Length < currentCache.NumModels) {
		materialFilteringWorkspace = new List<Transform>[currentCache.NumModels];
		for (int i = 0; i < materialFilteringWorkspace.Length; ++i) materialFilteringWorkspace[i] = new List<Transform>();
	}
	for (int i = 0; i < materialFilteringWorkspace.Length; ++i) materialFilteringWorkspace[i].Clear();

	SortByProximityToCamera(currentMID);
	uint numInstances = 0U;
	for (uint i = 0U; i < currentMID.Length; ++i) {
		ModelInstanceData curMID = sortedModelData[i];
		if (!curMID.InUse) continue;
		SceneLayer layer = currentSceneLayers[curMID.SceneLayerIndex];
		if (layer == null || !layer.GetRenderingEnabled() || !addedSceneLayers.Contains(layer)) continue;

		if (curMID.ModelIndex == __VEGG_MH.ModelIndex && currentCache.ID == __VEGG_MH.GeoCacheID) {
			int instanceIndex = 0;
			for (int j = 0; j < currentMID.Length; ++j) {
				if (currentMID.Data[j].Transform == curMID.Transform) {
					instanceIndex = j;
					break;
				}
			}
			Quaternion rot = Quaternion.IDENTITY;
			foreach (var kvp in __VEGG_MIH_ARR) {
				if (kvp.Key.InstanceIndex == instanceIndex) {
					rot = kvp.Value;
					break;
				}
			}
			materialFilteringWorkspace[curMID.ModelIndex].Add(curMID.Transform.RotateBy(rot));
		}
		else materialFilteringWorkspace[curMID.ModelIndex].Add(curMID.Transform);
		++numInstances;
	}

	// Concatenate & queue render commands
	if (instanceConcatWorkspace == null || instanceConcatWorkspace.Length < numInstances) {
		instanceConcatWorkspace = new Matrix[numInstances << 1]; // x2 so we don't create loads of garbage if the count keeps increasing by 1
	}

	uint instanceStartOffset = RenderCache_IterateMaterial_ConcatReserve(numInstances);
	uint nextWorkspaceIndex = 0;
	uint outVBStartIndex, outIBStartIndex, outVBCount, outIBCount;

	for (uint mI = 0U; mI < materialFilteringWorkspace.Length; ++mI) {
		List<Transform> filteredTransformList = materialFilteringWorkspace[mI];
		int numFilteredTransforms = filteredTransformList.Count;
		if (numFilteredTransforms == 0) continue;

		currentCache.GetModelBufferValues(mI, out outVBStartIndex, out outIBStartIndex, out outVBCount, out outIBCount);

		QueueRenderCommand(RenderCommand.DrawIndexedInstanced(
			(int) outVBStartIndex,
			outIBStartIndex,
			outIBCount,
			nextWorkspaceIndex + instanceStartOffset,
			(uint) numFilteredTransforms
		));

		for (int iI = 0; iI < numFilteredTransforms; ++iI) {
			if (mI == __EGGHACK_MH.ModelIndex && currentCache.ID == __EGGHACK_MH.GeoCacheID) {
				instanceConcatWorkspace[nextWorkspaceIndex++] = filteredTransformList[iI].RotateBy(__EGGHACK_ROT).AsMatrixTransposed;
			}
			else instanceConcatWorkspace[nextWorkspaceIndex++] = filteredTransformList[iI].AsMatrixTransposed;
		}
	}

	RenderCache_IterateMaterial_Concat(instanceConcatWorkspace, instanceStartOffset, numInstances);
}


Code snippet taken from "Postmortems - Clearly Too Slow".