537 const Teuchos::ArrayView<const lno_t>& adjs,
538 const Teuchos::ArrayView<const offset_t>& offsets,
539 const Teuchos::RCP<femv_t>& femv,
540 const Teuchos::ArrayView<const gno_t>& gids,
541 const Teuchos::ArrayView<const int>& rand,
542 const Teuchos::ArrayView<const int>& owners,
543 RCP<const map_t> mapOwnedPlusGhosts,
544 const std::unordered_map<
lno_t, std::vector<int>>& procs_to_send){
545 if(verbose) std::cout<<comm->getRank()<<
": inside coloring algorithm\n";
548 double total_time = 0.0;
549 double interior_time = 0.0;
550 double comm_time = 0.0;
551 double comp_time = 0.0;
552 double recoloring_time = 0.0;
553 double conflict_detection = 0.0;
555 const int numStatisticRecordingRounds = 100;
559 std::vector<int> deg_send_cnts(comm->getSize(),0);
560 std::vector<gno_t> deg_sdispls(comm->getSize()+1,0);
561 for(
int i = 0; i < owners.size(); i++){
562 deg_send_cnts[owners[i]]++;
565 gno_t deg_sendsize = 0;
566 std::vector<int> deg_sentcount(comm->getSize(),0);
567 for(
int i = 1; i < comm->getSize()+1; i++){
568 deg_sdispls[i] = deg_sdispls[i-1] + deg_send_cnts[i-1];
569 deg_sendsize += deg_send_cnts[i-1];
571 std::vector<gno_t> deg_sendbuf(deg_sendsize,0);
572 for(
int i = 0; i < owners.size(); i++){
573 size_t idx = deg_sdispls[owners[i]] + deg_sentcount[owners[i]];
574 deg_sentcount[owners[i]]++;
575 deg_sendbuf[idx] = mapOwnedPlusGhosts->getGlobalElement(i+nVtx);
577 Teuchos::ArrayView<int> deg_send_cnts_view = Teuchos::arrayViewFromVector(deg_send_cnts);
578 Teuchos::ArrayView<gno_t> deg_sendbuf_view = Teuchos::arrayViewFromVector(deg_sendbuf);
579 Teuchos::ArrayRCP<gno_t> deg_recvbuf;
580 std::vector<int> deg_recvcnts(comm->getSize(),0);
581 Teuchos::ArrayView<int> deg_recvcnts_view = Teuchos::arrayViewFromVector(deg_recvcnts);
582 AlltoAllv<gno_t>(*comm, *env, deg_sendbuf_view, deg_send_cnts_view, deg_recvbuf, deg_recvcnts_view);
585 for(
int i = 0; i < deg_recvbuf.size(); i++){
586 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_recvbuf[i]);
587 deg_recvbuf[i] = offsets[lid+1] - offsets[lid];
590 ArrayRCP<gno_t> ghost_degrees;
591 AlltoAllv<gno_t>(*comm, *env, deg_recvbuf(), deg_recvcnts_view, ghost_degrees, deg_send_cnts_view);
593 Kokkos::View<gno_t*, device_type> ghost_degrees_dev(
"ghost degree view",ghost_degrees.size());
594 typename Kokkos::View<gno_t*, device_type>::HostMirror ghost_degrees_host = Kokkos::create_mirror(ghost_degrees_dev);
595 for(
int i = 0; i < ghost_degrees.size(); i++){
596 lno_t lid = mapOwnedPlusGhosts->getLocalElement(deg_sendbuf[i]);
597 ghost_degrees_host(lid-nVtx) = ghost_degrees[i];
599 Kokkos::deep_copy(ghost_degrees_dev, ghost_degrees_host);
603 for(
size_t i = 0; i < nVtx; i++){
604 offset_t curr_degree = offsets[i+1] - offsets[i];
605 if(curr_degree > local_max_degree){
606 local_max_degree = curr_degree;
609 Teuchos::reduceAll<int, offset_t>(*comm,Teuchos::REDUCE_MAX,1, &local_max_degree, &global_max_degree);
610 if(comm->getRank() == 0 && verbose) std::cout<<
"Input has max degree "<<global_max_degree<<
"\n";
611 if(verbose)std::cout<<comm->getRank()<<
": creating Kokkos Views\n";
613 Kokkos::View<offset_t*, device_type> dist_degrees(
"Owned+Ghost degree view",rand.size());
614 typename Kokkos::View<offset_t*, device_type>::HostMirror dist_degrees_host = Kokkos::create_mirror(dist_degrees);
616 for(
int i = 0; i < adjs.size(); i++){
617 if((
size_t)adjs[i] < nVtx)
continue;
618 dist_degrees_host(adjs[i])++;
621 for(
int i = 0; i < offsets.size()-1; i++){
622 dist_degrees_host(i) = offsets[i+1] - offsets[i];
625 Kokkos::View<offset_t*, device_type> dist_offsets(
"Owned+Ghost Offset view", rand.size()+1);
626 typename Kokkos::View<offset_t*, device_type>::HostMirror dist_offsets_host = Kokkos::create_mirror(dist_offsets);
629 dist_offsets_host(0) = 0;
630 uint64_t total_adjs = 0;
631 for(Teuchos_Ordinal i = 1; i < rand.size()+1; i++){
632 dist_offsets_host(i) = dist_degrees_host(i-1) + dist_offsets_host(i-1);
633 total_adjs+= dist_degrees_host(i-1);
636 Kokkos::View<lno_t*, device_type> dist_adjs(
"Owned+Ghost adjacency view", total_adjs);
637 typename Kokkos::View<lno_t*, device_type>::HostMirror dist_adjs_host = Kokkos::create_mirror(dist_adjs);
639 for(Teuchos_Ordinal i = 0; i < rand.size(); i++){
640 dist_degrees_host(i) = 0;
642 for(
int i = 0; i < adjs.size(); i++) dist_adjs_host(i) = adjs[i];
643 if(comm->getSize() > 1){
644 for(
size_t i = 0; i < nVtx; i++){
645 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
647 if( (
size_t)adjs[j] >= nVtx){
649 dist_adjs_host(dist_offsets_host(adjs[j]) + dist_degrees_host(adjs[j])) = i;
650 dist_degrees_host(adjs[j])++;
656 if(verbose) std::cout<<comm->getRank()<<
": copying host mirrors to device views\n";
658 Kokkos::deep_copy(dist_degrees, dist_degrees_host);
659 Kokkos::deep_copy(dist_offsets, dist_offsets_host);
660 Kokkos::deep_copy(dist_adjs, dist_adjs_host);
661 if(verbose) std::cout<<comm->getRank()<<
": done copying to device\n";
664 Kokkos::View<gno_t*, device_type> recoloringSize(
"Recoloring Queue Size",1);
665 typename Kokkos::View<gno_t*, device_type>::HostMirror recoloringSize_host = Kokkos::create_mirror(recoloringSize);
666 recoloringSize_host(0) = 0;
667 Kokkos::deep_copy(recoloringSize, recoloringSize_host);
670 Kokkos::View<int*,device_type> rand_dev(
"randVec",rand.size());
671 typename Kokkos::View<int*, device_type>::HostMirror rand_host = Kokkos::create_mirror(rand_dev);
672 for(Teuchos_Ordinal i = 0; i < rand.size(); i++){
673 rand_host(i) = rand[i];
677 Kokkos::View<gno_t*, device_type> gid_dev(
"GIDs",gids.size());
678 typename Kokkos::View<gno_t*,device_type>::HostMirror gid_host = Kokkos::create_mirror(gid_dev);
679 for(Teuchos_Ordinal i = 0; i < gids.size(); i++){
680 gid_host(i) = gids[i];
684 Kokkos::deep_copy(rand_dev,rand_host);
685 Kokkos::deep_copy(gid_dev, gid_host);
687 if(verbose)std::cout<<comm->getRank()<<
": done creating recoloring datastructures\n";
690 for(
size_t i = 0; i < nVtx; i++){
691 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
692 if((
size_t)adjs[j] >= nVtx) {
698 if(verbose)std::cout<<comm->getRank()<<
": creating send views\n";
701 Kokkos::View<lno_t*, device_type> verts_to_send_view(
"verts to send",boundary_size);
702 Kokkos::parallel_for(
"init verts_to_send_view",
703 Kokkos::RangePolicy<execution_space, int>(0,boundary_size),
704 KOKKOS_LAMBDA(
const int& i){
705 verts_to_send_view(i) = -1;
709 Kokkos::View<size_t*, device_type> verts_to_send_size(
"verts to send size",1);
710 Kokkos::View<size_t*, device_type, Kokkos::MemoryTraits<Kokkos::Atomic> > verts_to_send_size_atomic = verts_to_send_size;
711 typename Kokkos::View<lno_t*, device_type>::HostMirror verts_to_send_host = create_mirror(verts_to_send_view);
712 typename Kokkos::View<size_t*,device_type>::HostMirror verts_to_send_size_host = create_mirror(verts_to_send_size);
714 verts_to_send_size_host(0) = 0;
715 deep_copy(verts_to_send_size, verts_to_send_size_host);
717 if(verbose)std::cout<<comm->getRank()<<
": Done creating send views, initializing...\n";
718 if(verbose)std::cout<<comm->getRank()<<
": boundary_size = "<<boundary_size<<
" verts_to_send_size_atomic(0) = "<<verts_to_send_size_atomic(0)<<
"\n";
720 Kokkos::parallel_for(
"Initialize verts_to_send",
721 Kokkos::RangePolicy<execution_space, int>(0,nVtx),
722 KOKKOS_LAMBDA(
const int&i){
723 for(
offset_t j = dist_offsets(i); j < dist_offsets(i+1); j++){
724 if((
size_t)dist_adjs(j) >= nVtx){
725 verts_to_send_view(verts_to_send_size_atomic(0)++) = i;
734 Kokkos::View<int*, device_type> ghost_colors(
"ghost color backups", rand.size()-nVtx);
735 if(verbose)std::cout<<comm->getRank()<<
": Done initializing\n";
736 gno_t sentPerRound[numStatisticRecordingRounds];
737 gno_t recvPerRound[numStatisticRecordingRounds];
739 if(verbose) std::cout<<comm->getRank()<<
": Coloring interior\n";
742 if(timing) comm->barrier();
743 interior_time =
timer();
744 total_time =
timer();
746 bool use_vbbit = (global_max_degree < 6000);
747 this->colorInterior<execution_space,memory_space>
748 (nVtx, dist_adjs, dist_offsets, femv,dist_adjs,0,use_vbbit);
750 interior_time =
timer() - interior_time;
751 comp_time = interior_time;
753 if(verbose) std::cout<<comm->getRank()<<
": Going to recolor\n";
754 bool recolor_degrees = this->pl->template get<bool>(
"recolor_degrees",
true);
757 if(comm->getSize() > 1){
759 if(verbose)std::cout<<comm->getRank()<<
": going to communicate\n";
762 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
763 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
765 comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
768 verts_to_send_size_host,
773 sentPerRound[0] = sent;
774 recvPerRound[0] = recv;
775 if(verbose) std::cout<<comm->getRank()<<
": done communicating\n";
776 verts_to_send_size_host(0) = 0;
777 deep_copy(verts_to_send_size, verts_to_send_size_host);
780 Kokkos::View<int**, Kokkos::LayoutLeft, device_type> femvColors =
781 femv->template getLocalView<device_type>(Tpetra::Access::ReadWrite);
782 Kokkos::View<int*, device_type> femv_colors = subview(femvColors, Kokkos::ALL, 0);
783 Kokkos::parallel_for(
"get colors from femv",
784 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
785 KOKKOS_LAMBDA(
const int& i){
786 ghost_colors(i) = femv_colors(i+nVtx);
790 double temp =
timer();
791 detectConflicts<execution_space, memory_space>(nVtx,
797 verts_to_send_size_atomic,
803 deep_copy(recoloringSize_host, recoloringSize);
804 conflict_detection +=
timer() - temp;
805 comp_time += conflict_detection;
808 if(verbose)std::cout<<comm->getRank()<<
": done initial recoloring, begin recoloring loop\n";
809 double totalPerRound[numStatisticRecordingRounds];
810 double commPerRound[numStatisticRecordingRounds];
811 double compPerRound[numStatisticRecordingRounds];
812 double recoloringPerRound[numStatisticRecordingRounds];
813 double conflictDetectionPerRound[numStatisticRecordingRounds];
814 double serialRecoloringPerRound[numStatisticRecordingRounds];
815 int vertsPerRound[numStatisticRecordingRounds];
817 if(comm->getSize() == 1) done =
true;
818 totalPerRound[0] = interior_time + comm_time + conflict_detection;
819 recoloringPerRound[0] = 0;
820 commPerRound[0] = comm_time;
821 compPerRound[0] = interior_time + conflict_detection;
822 conflictDetectionPerRound[0] = conflict_detection;
823 recoloringPerRound[0] = 0;
824 vertsPerRound[0] = 0;
825 int distributedRounds = 1;
826 int serial_threshold = this->pl->template get<int>(
"serial_threshold",0);
828 Kokkos::View<lno_t*, device_type> verts_to_recolor(
"verts_to_recolor", boundary_size);
829 typename Kokkos::View<int*, device_type>::HostMirror ghost_colors_host;
831 while(recoloringSize_host(0) > 0 || !done){
832 if(recoloringSize_host(0) < serial_threshold)
break;
834 auto femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
835 auto femv_colors = subview(femvColors, Kokkos::ALL, 0);
837 if(distributedRounds < numStatisticRecordingRounds) {
838 vertsPerRound[distributedRounds] = recoloringSize_host(0);
844 Kokkos::deep_copy(verts_to_recolor, verts_to_send_view);
846 double recolor_temp =
timer();
848 deep_copy(verts_to_send_size_host, verts_to_send_size);
849 if(verts_to_send_size_host(0) > 0){
852 dist_adjs,dist_offsets,
855 verts_to_send_size_host(0),
859 if(distributedRounds < numStatisticRecordingRounds){
860 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
861 recoloring_time += recoloringPerRound[distributedRounds];
862 comp_time += recoloringPerRound[distributedRounds];
863 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
864 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
866 double recolor_round_time =
timer() - recolor_temp;
867 recoloring_time += recolor_round_time;
868 comp_time += recolor_round_time;
873 recoloringSize_host(0) = 0;
874 Kokkos::deep_copy(recoloringSize,recoloringSize_host);
876 Kokkos::parallel_for(
"set femv colors",
877 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
878 KOKKOS_LAMBDA(
const int& i){
879 femv_colors(i+nVtx) = ghost_colors(i);
883 Kokkos::deep_copy(verts_to_send_host, verts_to_send_view);
884 Kokkos::deep_copy(verts_to_send_size_host, verts_to_send_size);
887 femvColors =
decltype(femvColors)();
888 femv_colors =
decltype(femv_colors)();
890 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
893 verts_to_send_size_host,
898 comm_time += curr_comm_time;
899 if(distributedRounds < numStatisticRecordingRounds){
900 commPerRound[distributedRounds] = curr_comm_time;
901 sentPerRound[distributedRounds] = sent;
902 recvPerRound[distributedRounds] = recv;
903 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
909 femvColors = femv->getLocalViewDevice(Tpetra::Access::ReadWrite);
910 femv_colors = subview(femvColors, Kokkos::ALL, 0);
911 Kokkos::parallel_for(
"get femv colors 2",
912 Kokkos::RangePolicy<execution_space, int>(0,rand.size()-nVtx),
913 KOKKOS_LAMBDA(
const int& i){
914 ghost_colors(i) = femv_colors(i+nVtx);
917 verts_to_send_size_host(0) = 0;
918 deep_copy(verts_to_send_size, verts_to_send_size_host);
919 double detection_temp =
timer();
920 detectConflicts<execution_space, memory_space>(nVtx,
926 verts_to_send_size_atomic,
933 Kokkos::deep_copy(recoloringSize_host, recoloringSize);
935 if(distributedRounds < numStatisticRecordingRounds){
936 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
937 conflict_detection += conflictDetectionPerRound[distributedRounds];
938 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
939 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
940 comp_time += conflictDetectionPerRound[distributedRounds];
942 double conflict_detection_round_time =
timer()- detection_temp;
943 conflict_detection += conflict_detection_round_time;
944 comp_time += conflict_detection_round_time;
948 int localDone = recoloringSize_host(0);
949 Teuchos::reduceAll<int, int>(*comm,Teuchos::REDUCE_SUM,1, &localDone, &globalDone);
956 if(recoloringSize_host(0) > 0 || !done){
957 ghost_colors_host = Kokkos::create_mirror_view(ghost_colors);
958 deep_copy(ghost_colors_host, ghost_colors);
959 deep_copy(verts_to_send_host, verts_to_send_view);
960 deep_copy(verts_to_send_size_host, verts_to_send_size);
965 while(recoloringSize_host(0) > 0 || !done){
967 auto femvColors = femv->getLocalViewHost(Tpetra::Access::ReadWrite);
968 auto femv_colors = subview(femvColors, Kokkos::ALL, 0);
971 if(distributedRounds < 100){
972 vertsPerRound[distributedRounds] = recoloringSize_host(0);
975 double recolor_temp =
timer();
977 if(verts_to_send_size_host(0) > 0){
980 (femv_colors.size(), dist_adjs_host, dist_offsets_host, femv, verts_to_send_host, verts_to_send_size_host(0),
true);
983 if(distributedRounds < numStatisticRecordingRounds){
984 recoloringPerRound[distributedRounds] =
timer() - recolor_temp;
985 recoloring_time += recoloringPerRound[distributedRounds];
986 comp_time += recoloringPerRound[distributedRounds];
987 compPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
988 totalPerRound[distributedRounds] = recoloringPerRound[distributedRounds];
990 double recolor_serial_round_time =
timer() - recolor_temp;
991 recoloring_time += recolor_serial_round_time;
992 comp_time += recolor_serial_round_time;
995 recoloringSize_host(0) = 0;
997 for(
size_t i = 0; i < rand.size() -nVtx; i++){
998 femv_colors(i+nVtx) = ghost_colors_host(i);
1002 double curr_comm_time = doOwnedToGhosts(mapOwnedPlusGhosts,
1005 verts_to_send_size_host,
1010 comm_time += curr_comm_time;
1012 if(distributedRounds < numStatisticRecordingRounds){
1013 commPerRound[distributedRounds] = curr_comm_time;
1014 sentPerRound[distributedRounds] = sent;
1015 recvPerRound[distributedRounds] = recv;
1016 totalPerRound[distributedRounds] += commPerRound[distributedRounds];
1018 for(
size_t i = 0; i < rand.size()-nVtx; i++){
1019 ghost_colors_host(i) = femv_colors(i+nVtx);
1022 verts_to_send_size_host(0) = 0;
1023 double detection_temp =
timer();
1024 detectConflicts<host_exec, host_mem>(nVtx,
1030 verts_to_send_size_host,
1031 recoloringSize_host,
1036 if(distributedRounds < numStatisticRecordingRounds){
1037 conflictDetectionPerRound[distributedRounds] =
timer() - detection_temp;
1038 conflict_detection += conflictDetectionPerRound[distributedRounds];
1039 compPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1040 totalPerRound[distributedRounds] += conflictDetectionPerRound[distributedRounds];
1041 comp_time += conflictDetectionPerRound[distributedRounds];
1043 double conflict_detection_serial_round_time =
timer() - detection_temp;
1044 conflict_detection += conflict_detection_serial_round_time;
1045 comp_time += conflict_detection_serial_round_time;
1049 int localDone = recoloringSize_host(0);
1050 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_SUM,1, &localDone, &globalDone);
1051 distributedRounds++;
1054 total_time =
timer() - total_time;
1058 std::cout<<comm->getRank()<<
": done recoloring loop, computing statistics\n";
1059 int localBoundaryVertices = 0;
1060 for(
size_t i = 0; i < nVtx; i++){
1061 for(
offset_t j = offsets[i]; j < offsets[i+1]; j++){
1062 if((
size_t)adjs[j] >= nVtx){
1063 localBoundaryVertices++;
1070 int totalBoundarySize = 0;
1071 int totalVertsPerRound[numStatisticRecordingRounds];
1072 double finalTotalPerRound[numStatisticRecordingRounds];
1073 double maxRecoloringPerRound[numStatisticRecordingRounds];
1074 double finalSerialRecoloringPerRound[numStatisticRecordingRounds];
1075 double minRecoloringPerRound[numStatisticRecordingRounds];
1076 double finalCommPerRound[numStatisticRecordingRounds];
1077 double finalCompPerRound[numStatisticRecordingRounds];
1078 double finalConflictDetectionPerRound[numStatisticRecordingRounds];
1079 gno_t finalRecvPerRound[numStatisticRecordingRounds];
1080 gno_t finalSentPerRound[numStatisticRecordingRounds];
1081 for(
int i = 0; i < numStatisticRecordingRounds; i++) {
1082 totalVertsPerRound[i] = 0;
1083 finalTotalPerRound[i] = 0.0;
1084 maxRecoloringPerRound[i] = 0.0;
1085 minRecoloringPerRound[i] = 0.0;
1086 finalCommPerRound[i] = 0.0;
1087 finalCompPerRound[i] = 0.0;
1088 finalConflictDetectionPerRound[i] = 0.0;
1089 finalSentPerRound[i] = 0;
1090 finalRecvPerRound[i] = 0;
1092 Teuchos::reduceAll<int,int>(*comm, Teuchos::REDUCE_SUM,1, &localBoundaryVertices,&totalBoundarySize);
1093 Teuchos::reduceAll<int,int>(*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,vertsPerRound,totalVertsPerRound);
1094 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,totalPerRound,finalTotalPerRound);
1095 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,recoloringPerRound,maxRecoloringPerRound);
1096 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MIN,numStatisticRecordingRounds,recoloringPerRound,minRecoloringPerRound);
1097 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,serialRecoloringPerRound,finalSerialRecoloringPerRound);
1098 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,commPerRound,finalCommPerRound);
1099 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,numStatisticRecordingRounds,compPerRound,finalCompPerRound);
1100 Teuchos::reduceAll<int,double>(*comm,
1101 Teuchos::REDUCE_MAX,numStatisticRecordingRounds,conflictDetectionPerRound,finalConflictDetectionPerRound);
1102 Teuchos::reduceAll<int,gno_t> (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,recvPerRound, finalRecvPerRound);
1103 Teuchos::reduceAll<int,gno_t> (*comm, Teuchos::REDUCE_SUM,numStatisticRecordingRounds,sentPerRound, finalSentPerRound);
1105 std::cout <<
"Rank " << comm->getRank()
1106 <<
": boundary size: " << localBoundaryVertices << std::endl;
1107 if(comm->getRank()==0)
1108 std::cout <<
"Total boundary size: " << totalBoundarySize << std::endl;
1109 for(
int i = 0; i < std::min(distributedRounds,numStatisticRecordingRounds); i++){
1110 std::cout <<
"Rank " << comm->getRank()
1111 <<
": recolor " << vertsPerRound[i]
1112 <<
" vertices in round " << i << std::endl;
1113 if(comm->getRank()==0) {
1114 std::cout <<
"recolored " << totalVertsPerRound[i]
1115 <<
" vertices in round " << i << std::endl;
1116 std::cout <<
"total time in round " << i
1117 <<
": " << finalTotalPerRound[i] << std::endl;;
1118 std::cout <<
"recoloring time in round " << i
1119 <<
": " << maxRecoloringPerRound[i] << std::endl;
1120 std::cout <<
"serial recoloring time in round " << i
1121 <<
": " << finalSerialRecoloringPerRound[i] << std::endl;
1122 std::cout <<
"min recoloring time in round " << i
1123 <<
": " << minRecoloringPerRound[i] << std::endl;
1124 std::cout <<
"conflict detection time in round " << i
1125 <<
": " << finalConflictDetectionPerRound[i] << std::endl;
1126 std::cout <<
"comm time in round " << i
1127 <<
": " << finalCommPerRound[i] << std::endl;
1128 std::cout <<
"total sent in round " << i
1129 <<
": " << finalSentPerRound[i] << std::endl;
1130 std::cout <<
"total recv in round " << i
1131 <<
": " << finalRecvPerRound[i] << std::endl;
1132 std::cout <<
"comp time in round " << i
1133 <<
": " << finalCompPerRound[i] << std::endl;
1137 double global_total_time = 0.0;
1138 double global_recoloring_time=0.0;
1139 double global_min_recoloring_time=0.0;
1140 double global_conflict_detection=0.0;
1141 double global_comm_time=0.0;
1142 double global_comp_time=0.0;
1143 double global_interior_time = 0.0;
1144 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&total_time,&global_total_time);
1145 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&recoloring_time,&global_recoloring_time);
1146 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MIN,1,&recoloring_time,&global_min_recoloring_time);
1147 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&conflict_detection,&global_conflict_detection);
1148 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&comm_time,&global_comm_time);
1149 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&comp_time,&global_comp_time);
1150 Teuchos::reduceAll<int,double>(*comm, Teuchos::REDUCE_MAX,1,&interior_time,&global_interior_time);
1153 if(comm->getRank()==0){
1154 std::cout <<
"Total Time: " << global_total_time << std::endl;
1155 std::cout <<
"Interior Time: " << global_interior_time << std::endl;
1156 std::cout <<
"Recoloring Time: " << global_recoloring_time << std::endl;
1157 std::cout <<
"Min Recoloring Time: " << global_min_recoloring_time << std::endl;
1158 std::cout <<
"Conflict Detection Time: " << global_conflict_detection << std::endl;
1159 std::cout <<
"Comm Time: " << global_comm_time << std::endl;
1160 std::cout <<
"Comp Time: " << global_comp_time << std::endl;
1163 if(verbose) std::cout<<comm->getRank()<<
": exiting coloring\n";