C A collection of vectorizable loops C Lee Higbie, 2/13/91 C PARAMETER (NPARHD=100,NPAR2=2,NPARM1=-1,NPARMN=1000000) COMMON /ONESH/ XS1(200),XS2(200),XS3(200) COMMON /DBL/ DS1(200),DS2(200),DS3(200) DOUBLE PRECISION DS1,DS2,DS3 COMMON /CMPLX/ CS1(200),CS2(200),CS3(200) COMPLEX CS1,CS2,CS3 COMMON /ONELN/ XL1(20 000 000),XL2(20 000 000),XL3(200 000) COMMON /TWO/ XP1(200,200), XP2(200,200),XP3(200,200) COMMON /TWO/ XPS1(8,8) COMMON /MANY/ T1(5,5,5),T2(5,5,5),F1(5,5,5,5),F2(5,5,5,5) COMMON /MANY/ FV1(5,5,5,5,5),FV2(5,5,5,5,5) COMMON /VARBLS/MV1,MV2,NV1,NV2,NV3,V1, V2, ISC1,M,N COMMON /INTGRS/ I1(200),I2(200),I3(200),I,J,K,L COMMON /LGCLS/ L1(200), L2(200),L3(200) LOGICAL L1,L2,L3 INTEGER*2 IS1, IS2, IS3 LOGICAL*1 LS1, LS2, LS3 COMMON/BAD/LS1,LS2(17),LS3(13),IS1,IS2(17),IS3(13),XS4(100) CHARACTER*1 CH1(200),CH2(200) Program: PROGRAM VECTST C ******************************************** C This program was written by Lee Higbie, C Digital Equipment Corportation. C ) Digital Equipment Corporation, 1990. C Each CDEC$ line should have an equivalent compiler C directive or assertion inserted for the compiler C under test. C CLH comment inserted before areas of non-standard Fortan C that may require commenting-out for some compilers C ******************************************** C INCLUDE 'VEC-PAR.INCL/LIST' C The included declaratives listed above are inlcuded in C all the test subroutines. They are listed here for C reference. DATA NV1,NV2,NV3 /100,1,-1/ C Initialize all arrays, just in case DO 500 I=1,200 XS1(I) = FLOAT(I) XS2(I) = FLOAT(I)+1.0 XS3(I) = FLOAT(I-1) CS1(I) = FLOAT(I)*(1.0,1.0) CS2(I) = FLOAT(I)*(1.0,0.0) CS3(I) = FLOAT(I)*(0.0,1.0) DS1(I) = FLOAT(I) DS2(I) = FLOAT(I+1) DS3(I) = FLOAT(I-1) I1(I) = I I2(I) = I+1 I3(I) = I-1 500 CONTINUE DO 610 I=1,200 DO 600 J=1,200 XP1(I,J) = FLOAT(I+J) XP2(I,J) = FLOAT(I*J) XP3(I,J) = FLOAT(I*J-1) 600 CONTINUE 610 CONTINUE DO 700 I=1,2000000 XL1(I) = FLOAT(I) XL2(I) = FLOAT(I+1) XL3(I) = FLOAT(I-1) 700 CONTINUE C About 250 VECTORIZATION TESTS: CALL SIMPLE C 45 simple vectorization tests. Some of these are more C tests of the target hardware than the compiler. CALL SUBSCR C 50 subscript tests. CALL MLTILP C 8 tests that require rearranging the C structure of nests of loops CALL BRNCHG C 36 tests involving branching CALL RCRSON C 35 tests for carefulness of C ambiguity checking. CALL GLOBAL C 28 tests that involve external C routines. All vectorizable. CALL TSTDIR C 6 tests of compiler directives, diagnostics C and warnings. CALL MISC C 18 tests that do not readily C fit into earlier groups. CALL NDCODE C 25 tests where the code generated C by the compiler may need to be C checked to really determine C how well the compiler did. STOP END FUNCTION VCTFN(X,Y) VCTFN = X**2 + Y**2 RETURN END SUBROUTINE VCTSUB(X,Y,Z) X = X**2 + Y**2 + Z**2 RETURN END SUBROUTINE SETNV(I1) C To check for multi-routine compilation. Sets argument to C a positive cnst. I1 = 2 RETURN END SUBROUTINE NOSET C To check for multi-routine compilation. Doesn't change NV2 COMMON /ONELN/ XL1(2000000),XL2(2000000),XL3(2000000) COMMON /VARBLS/MV1,MV2,NV1,NV2,NV3,V1, V2, ISC1,M,N PARAMETER (NPARHD=100,NPAR2=2,NPARM1=-1,NPARMN=1000000) NV3 = 2 RETURN END SUBROUTINE MAYSET C To check for multi-routine compilation. Scalar vars are C unknown after CALL to this routine COMMON /ONELN/ XL1(2000000),XL2(2000000),XL3(2000000) COMMON /VARBLS/MV1,MV2,NV1,NV2,NV3,V1, V2, ISC1,M,N PARAMETER (NPARHD=100,NPAR2=2,NPARM1=-1,NPARMN=1000000) READ *,MV1,MV2,NV1,NV2,NV3,M,N RETURN END SUBROUTINE VCTSB2(X,Y,Z,M,N) DIMENSION X(N),Y(N),Z(N) C Checks to see if M=2 is known to subroutine or if code is C moved to CALLer and expanded in-line. DO 100 I=1,N-3 X(I) = X(I+M)*Y(I) + Z(I) 100 CONTINUE RETURN END SUBROUTINE VCTSB3(X,Y,Z,M,N) DIMENSION X(N),Y(N),Z(N) C TBS: to be supplied????? DO 100 I=1,N-3 X(I) = X(I+M)*Y(I) + Z(I) 100 CONTINUE RETURN END SUBROUTINE SIMPLE INCLUDE 'VEC-PAR.INCL' C 45 simple vectorization tests. Some of these are more C tests of the target hardware than the compiler. DO 100 I=1,NPARHD XS1(I) = XS2(I) + XS3(I) 100 CONTINUE C Two tests for length (loop trip count restrictions) restrictions DO 200 I=1,N XS1(I) = XS2(I) + XS3(I) 200 CONTINUE DO 300 I=1,200 000 XL1(I) = XL2(I) + XL3(I) 300 CONTINUE DO 400 I=1,20 000 000 XL1(I) = XL2(I) 400 CONTINUE DO 500 I=1,100 000 000 XL1(I) = XL2(I)*2 500 CONTINUE C Test for limits on stride DO 600 I=1,30, 1000 XL1(I) = XL2(I)+1 600 CONTINUE DO 700 I=1,30, 1000 000 XL1(I) = XL2(I)+1 700 CONTINUE DO 800 I=1,30, 10 000 000 XL2(I) = XL1(I)*3.3 800 CONTINUE C Vectorize integer and Boolean loops? DO 1000 I=1,NPARHD I1(I) = I2(I) + I3(I) 1000 CONTINUE CLH Following loop is non-standard Fortran DO 1100 I=1,NPARHD L1(I) = L2(I) .OR. L3(I) 1100 CONTINUE C Logical above, double precision below, then complex DO 1200 I=1,NPARHD DS1(I) = DS2(I) + DS3(I) 1200 CONTINUE DO 1300 I=1,NPARHD CS1(I) = CS2(I) + CS3(I) 1300 CONTINUE C Vectorize short integer and short Boolean loops? DO 1400 I=1,N IS2(I) = IS2(I) + IS3(I) 1400 CONTINUE CLH Following loop is non-standard Fortran DO 1500 I=1,N LS2(I) = LS2(I) .OR. LS3(I) 1500 CONTINUE C Mixed single and double DO 2000 I=1,NPARHD DS1(I) = (DS2(I) + DS3(I))*XS1(I) 2000 CONTINUE C Mixed single real and complex DO 2100 I=1,NPARHD CS1(I) = (CS2(I) + CS3(I))*XS1(I) 2100 CONTINUE DO 2200 I=1,NPARHD XS1(I) = XS2(I)*XS3(I) CS1(I) = (CS2(I) + CS3(I))*XS1(I) 2200 CONTINUE DO 2300 I=1,NPARHD XS1(I) = XS2(I)*XS3(I) DS1(I) = DS2(I) + DS3(I) CS1(I) = (CS2(I) + CS3(I))*XS1(I) 2300 CONTINUE DO 2400 I=1,NPARHD XS1(I) = XS2(I)*XS3(I) DS1(I) = DS2(I) + DS3(I) CS1(I) = (CS2(I) + CS3(I))*XS1(I) L1(I) = L2(I) .AND. L3(I) 2400 CONTINUE DO 2500 I=1,NPARHD XS1(I) = XS2(I)*XS3(I) DS1(I) = DS2(I) + DS3(I) CS1(I) = (CS2(I) + CS3(I))*XS1(I) L1(I) = L2(I) .AND. L3(I) I1(I) = I2(I) * I3(I) 2500 CONTINUE C Three tests that see if character data bothers vectorization DO 2600 I=1,N CH1(I) = CH2(I) 2600 CONTINUE DO 2700 I=1,N CH1(I) = CH2(I+11) 2700 CONTINUE DO 2800 I=1,N L1(I) = CH1(I) .EQ. CH2(I) 2800 CONTINUE C Tests using short control vairables. DO 3000 I=1,IS1 XS1(I) = XS2(I) + XS3(I) 3000 CONTINUE DO 3100 IS1=1,NPARHD XS1(IS1) = XS2(IS1) + XS3(IS1) 3100 CONTINUE DO 3200 I=IS2(N),I2(M) XS1(I) = XS2(I) + XS3(I) 3200 CONTINUE C Loops to see where vectorization begins. C Test to see where vectorization begins ONE TEST C Test is passed as long as some vectorize and some don't. DO 3310 I=1,1 XS1(I) = XS2(I) + XS3(I) 3310 CONTINUE DO 3320 I=1,2 XS1(I) = XS2(I) + XS3(I) 3320 CONTINUE DO 3330 I=1,3 XS1(I) = XS2(I) + XS3(I) 3330 CONTINUE DO 3340 I=1,4 XS1(I) = XS2(I) + XS3(I) 3340 CONTINUE DO 3350 I=1,5 XS1(I) = XS2(I) + XS3(I) 3350 CONTINUE DO 3360 I=1,6 XS1(I) = XS2(I) + XS3(I) 3360 CONTINUE DO 3370 I=1,7 XS1(I) = XS2(I) + XS3(I) 3370 CONTINUE DO 3380 I=1,8 XS1(I) = XS2(I) + XS3(I) 3380 CONTINUE DO 3390 I=1,9 XS1(I) = XS2(I) + XS3(I) 3390 CONTINUE C End of length measure. END OF ONE TEST CALL MAYSET C Loops to see where vectorization begins with ambiguity. C Test to see where vectorization begins ONE TEST. C Test is failed if vectorization is same as in test 3300 C above or if conditional vectorization is not done. DO 3410 I=1,2 XS1(I) = XS2(I+NV2) + XS3(I) 3410 CONTINUE DO 3420 I=1,3 XS1(I) = XS2(I+NV2) + XS3(I) 3420 CONTINUE DO 3430 I=1,4 XS1(I) = XS2(I+NV2) + XS3(I) 3430 CONTINUE DO 3440 I=1,5 XS1(I) = XS2(I+NV2) + XS3(I) 3440 CONTINUE DO 3450 I=1,6 XS1(I) = XS2(I+NV2) + XS3(I) 3450 CONTINUE DO 3460 I=1,7 XS1(I) = XS2(I+NV2) + XS3(I) 3460 CONTINUE DO 3470 I=1,8 XS1(I) = XS2(I+NV2) + XS3(I) 3470 CONTINUE DO 3480 I=1,9 XS1(I) = XS2(I+NV2) + XS3(I) 3480 CONTINUE DO 3490 I=1,10 XS1(I) = XS2(I+NV2) + XS3(I) 3490 CONTINUE C End of length measure. END OF ONE TEST C Should not vectorize next four loops, too short. N=2 DO 4000 I=1,N XS1(I) = XS2(I) + XS3(I) 4000 CONTINUE DO 4100 I=1,N XS1(I) = XS2(I) + XS3(I) 4100 CONTINUE CDEC$ ASSERT(M.LE.2) DO 4200 I=1,M XS1(I) = XS2(I) + XS3(I) 4200 CONTINUE DO 4300 I=1,M XS1(I) = XS2(I) + XS3(I) 4300 CONTINUE C Tests where loop indices are not as obvious as in a simple loop K = 0 DO 5000 I=1,NPARHD K = K + 2 XS1(I) = XS2(K) + XS3(I) 5000 CONTINUE DO 5100 I=1,NPARHD K = K - 2 XS1(I) = XS2(K) + XS3(I) 5100 CONTINUE K = 0 DO 5200 I=1,NPARHD K = K + NV1 XS1(I) = XS2(K) + XS3(I) 5200 CONTINUE DO 5300 I=1,MIN(ALOG(FLOAT(N)), FLOAT(NPARHD)) XS1(I) = XS2(I) + XS3(I) 5300 CONTINUE C Check for usability of mixture of index types IS1 = NV1 DO 5400 I=1,100 IS1 = IS1+1 XS1(I) = XS2(IS1) + XS3(I) 5400 CONTINUE C Check for usability of mixture of index types and strides IS1 = NV1 DO 5500 I=1,100 IS1 = IS1+NV2 XS1(I) = XS2(IS1) + XS3(I) 5500 CONTINUE C Check for vectorization of scalar-appearing temporary vars C Tests with scalar temporaries that should be C promoted to vectors DO 6000 I=1,NPARHD V1 = XS1(I)**2 + XS2(I)**2 XL1(I) = V1 XL2(I) = 1.0/V1 6000 CONTINUE CALL MAYSET DO 6100 I=2,NPARHD V1 = XS2(I)*XS3(I) V2 = XS2(I-1) * XS3(I-1) XS1(I) = (V1+V2)*(V1-V2) 6100 CONTINUE CALL MAYSET V2 = XS2(1)*XS3(1) DO 6200 I=2,NPARHD V1 = XS2(I)*XS3(I) XS1(I) = (V1+V2)*(V1-V2) V2 = XS2(I) * XS3(I) 6200 CONTINUE CALL MAYSET V1 = XS2(1)*XS3(1) V2 = XS2(1)*XS3(1) DO 6300 I=2,NPARHD XS1(I) = (V1+V2)*(V1-V2) V1 = XS2(I) + XS3(I) V2 = XS2(I) * XS3(I) * V1 6300 CONTINUE C Can scalar temps be reused? CALL MAYSET DO 6400 I=2,NPARHD V1 = XS2(1)*XS3(1) V2 = XS2(1)*XS3(1) XS1(I) = (V1+V2)*(V1-V2) V1 = XS2(I) + XS3(I) V2 = XS2(I) * XS3(I) * V1 XP1(I,NPAR2) = V1*V2 V1 = XS2(I)**2 V2 = XS3(I)**2 6400 CONTINUE C Can lots of scalar temps be used? CALL MAYSET V1 = XS2(1)*XS3(1) V2 = XS2(1)*XS3(1) DO 6500 I=2,NPARHD XS1(I) = (V1+V2)*(V1-V2) V3 = XS2(I) + XS3(I) V4 = XS2(I) * XS3(I) * V1 V5 = XS3(I) + V3*V4 V6 = XS2(I)*XP2(I,NPAR2) V7 = V6*V5+V3 V8 = V7-V6 V9 = V8**2 XP1(I,NPAR2) = V1*V2+V3*V4 * V9 V1 = XS2(I)**2 V2 = XS3(I)**2 6500 CONTINUE C Will swap vectorize? CALL MAYSET DO 6600 I=1,N V1 = XS1(I) XS1(I) = XS2(I) XS2(I) = V1 6600 CONTINUE RETURN END SUBROUTINE SUBSCR INCLUDE 'VEC-PAR.INCL' C 50 subscript tests. DO 100 I=1,NPARHD XS1(I) = XS2(I+1)+XS3(I-3) 100 CONTINUE DO 200 I=1,NPARHD XS1(I*2) = XS2(2*I+1)+XS3(3*I-3) 200 CONTINUE DO 300 I=1,NPARHD,NPAR2 XS1(I) = XS2(I+1)+XS3(I-3) 300 CONTINUE DO 400 I=1,NPARHD,NPAR2 XS1(I*2) = XS2(2*I+1)+XS3(3*I-3) 400 CONTINUE DO 500 I=1,NPARHD,NPAR2 XS1(I*NV1) = XS2(2*I+1)+XS3(3*I-3) 500 CONTINUE C Next loop includes a gather DO 600 I=1,NPARHD,NPAR2 XS1(I) = XS2(I1(I)+1)+XS3(3*I-3) 600 CONTINUE C See if it can vectorize loop indexes (iota fns) DO 700 I=1,NPARHD,NPAR2 XS1(I) = I 700 CONTINUE DO 800 I=1,NPARHD,NPAR2 XS1(I) = FLOAT(I)*3.141590 800 CONTINUE DO 900 I=1,N XS1(I) = I*(I-1)+1 900 CONTINUE DO 1100 I=1,NPARHD,NPAR2 XP1(I,I) = 1.1 1100 CONTINUE DO 1210 I=1,NPARHD,NPAR2 DO 1200 J=1, I XP1(NPARHD-I+1,J) = 1.144 * XP2(I+J, I-J) 1200 CONTINUE 1210 CONTINUE C K Subscript is not linear, gather required K=1 DO 1300 I=1,N K = K * 2 XS1(I) = XL2(K) + XS3(I) 1300 CONTINUE C Tests to see how complicated the subscript can be DO 2100 I=1,NPARHD XS1(I*3+13) = XS2(I*2+1)+XS3(3*I-3) 2100 CONTINUE DO 2200 I=1,NPARHD,NPAR2 XS1(I*3+13*NV1+NV2) = XS2(I*2*NV2+(NV3+NPAR2)*I+1)+XS3(3*I-3) 2200 CONTINUE DO 2300 I=1,NPARHD MV2 = NV3*N+NPARHD-NPAR2*J MV3 = NV3*N+NPARM1+NPAR2*J XS1(I*3+13*NV1+NV2) = XS2(I*2*MV2+(NV3+NPAR2)*I+1)+XS3(3*I-3) XS1(I*3+13*NV1+MV3) = XS2(I*2*NV2+(NV3+NPAR2)*I+1)+XS3(3*I-3) 2300 CONTINUE C Tests for various subscript generation techniques CALL MAYSET DO 2400 I=1, NPARHD NV1 = NV1+1 MV1 = MV1-1 XS1(I) = XS2(NV1)*XS3(MV1) 2400 CONTINUE CALL MAYSET DO 2500 I=1, NPARHD NV1 = NV1+1 MV1 = MV1-1 XS1(I) = XS2(NV1)*XS3(MV1) 2500 CONTINUE CALL MAYSET DO 2600 I=1, NPARHD NV1 = NV1+1 MV1 = MV1-IS1 XS1(I) = XS2(NV1)*XS3(MV1) 2600 CONTINUE CALL MAYSET DO 2700 I=1, NPARHD NV1 = NV1+1 MV1 = I+NV1-1 XS1(I) = XS2(NV1)*XS3(MV1) 2700 CONTINUE C Test for interlacing of assignments DO 3000 I=2,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I-1) = XS2(I)*XS3(I) + XS3(I-1) 3000 CONTINUE DO 3100 I=2,NPARHD XS1(I) = XS1(I)*XS2(I-1) + XS3(I) XS2(I) = XS2(I)*XS3(I) + XS3(I-1) 3100 CONTINUE C Lower triangular system DO 3200 I=1,N DO 3200 J=1,I-1 DO 3200 K = J+1,N XP1(K,I) = XP1(K,I) + XP1(K,J)*XP1(J,I) 3200 CONTINUE C Another test for interlacing of assignments DO 3300 I=2,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I) = XS2(I)*XS3(I) + XS1(I+1) 3300 CONTINUE C Gather required because of I*I subscript in next loop DO 3400 I=1,NPARHD,NPAR2 XS1(I*I) = XS2(I*2+1)+XS3(3*I-3) 3400 CONTINUE C Test to see if strides are accounted for. DO 3500 I=2,NPARHD,2 XS1(I) = XS1(I-1)*XS2(I) + XS3(I) 3500 CONTINUE DO 3600 I=3,NPARHD,3 XS1(I) = XS1(I-1)*XS2(I) + XS1(I-2)*XS3(I) 3600 CONTINUE DO 3700 I=NPARHD,2,-2 XS1(I) = XS1(I-1)*XS2(I) + XS3(I) 3700 CONTINUE C Checks for knowledge of variable value in stride NV2 = -3 DO 3800 I=NPARHD,3,NV2 XS1(I) = XS1(I-1)*XS2(I) + XS1(I-2)*XS3(I) XL1(I+1) = XL1(I-1)*XS2(I) + XL1(I)*XS3(I) 3800 CONTINUE C Next is one where the stride is larger than the length of the C vector registers so recursion does not preclude vectorization. DO 3900 I=2,NPARHD XL1(I+550) = XL1(I)*XS2(I) + XL1(I-1)*XS3(I) 3900 CONTINUE C Test where vectorization should not be done or where various C special tricks are required to assure correctness C Cannot safely vectorize--may have recursion; C directives required to tell compiler there is no C overlap in input and output index vectors. See C loop 4600 below and loop 2100 in RCRSON C for vectorizing versions. C Test passes if loop is NOT vectorized. DO 4000 I=1, NPARHD XS1(I1(I)) = XS1(I2(I))*XS3(I) 4000 CONTINUE C Cannot safely vectorize because offset is unknown. Test C passed if conditional vectorization used. CALL MAYSET DO 4100 I=1,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I+NV2) = XS2(I)*XS3(I) + XS3(I-1) 4100 CONTINUE C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 4200 I=1,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I+NV2) = XS2(I)*XS3(I) + XS3(I-1) 4200 CONTINUE CDEC$ ASSERT(NV2 .GT. 0) C ====> NOT vectorizable C Test passed if ASSERT statement available in dialect C and works. DO 4300 I=1,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I+NV2) = XS2(I)*XS3(I) + XS3(I-1) 4300 CONTINUE C Next one is vectorizable DO 4400 I=2,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I-NV2) = XS2(I)*XS3(I) + XS3(I-1) 4400 CONTINUE C Will it be broken into several loops? CALL MAYSET DO 4500 I=1,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I) = XS2(I+NV2)*XS3(I) + XS3(I-1) 4500 CONTINUE DO 4600 I=1, NPARHD XS1(I2(I)) = XS1(I2(I))*XS3(I) 4600 CONTINUE C Test to see if a loop is reordered DO 5000 I=2,NPARHD XS1(I) = XS1(I)*XS2(I-1) + XS3(I) XS2(I) = XS2(I+1)*XS3(I) + XS3(I-1) 5000 CONTINUE C Test to see if a loop is separated into vector and scalar loops. DO 5100 I=2,NPARHD XS1(I) = XS1(I) + XS3(I) XS2(I) = XS2(I-1)*XS3(I) + XS3(I-1) XS3(I) = XS3(I)**2 5100 CONTINUE C Tougher test for separating vector and scalar portions DO 5200 I=2,NPARHD XS1(I-1) = XS2(I-1)*V1 XS3(I-1) = XS1(I-1)*V2 XL1(I) = XS3(I-1) + XP1(I,NPAR2) XS2(I) = XL2(I+1) + XL2(I-1) XL2(I) = XL3(I) + V1 XL3(I+1) = XL2(I) *V1*V2 5200 CONTINUE C Another type of vector/scalar loop separation test. C All but first term can be done in vector mode. DO 5300 I=2,NPARHD XS1(I) = XS1(I-1)*XS2(I) + XL1(I)**3+(XS2(I)+XL2(I))* + (XS3(I)+XL2(I)) + (XS2(I)+XL2(I))**2 5300 CONTINUE C Test for subscripts extracted from floating point variables C Floating ==> integer conversion required ==> gather req'd DO 5400 I=1,NPARHD J = XS3(I)*XS2(I) + XS3(I) XS1(I+NV2) = XS2(I)*XS3(J) + XS3(I-1) 5400 CONTINUE C Like 5200 loop but scatter required DO 5500 I=1,NPARHD J = XL1(I)*XS2(I) + XS3(I) XS1(J+NV2) = XS2(I)*XS3(J) + XS3(I-1) 5500 CONTINUE C Like 5400 loop: gather required DO 5600 I=1,NPARHD,2 XS1(I+NV2) = XS2(1+I/3)*XS3(J) + XS3(I-1) 5600 CONTINUE C Tests on many dimensional arrays DO 6000 I=1,N T1(I,J,K) = T2(I,J,K)*F1(K,J,I,M) 6000 CONTINUE DO 6100 I=1,N T1(I,J,I) = T2(I,I,K)*F1(I,J,M,I) 6100 CONTINUE DO 6200 I=1,N K=N-I J= J+1 T1(I,J,I) = T2(I,I,K)*F1(I,J,M,I) 6200 CONTINUE DO 6300 I=1,N K=N-I J= J+1 T1(I,J,I) = T2(I,I,K)*F1(I,J,M,I)+ FV1(I,NP2,J,K,I)* + FV2(I,I,I,J,K) 6300 CONTINUE DO 6400 I=1,IS2(1) DO 6400 J=1,IS2(2) DO 6400 K=1,IS2(3) T1(I,J,K) = T2(I,J,K)**3 DO 6400 L=1,IS2(4) F1(I,J,K,L) = (F2(I,J,K,L)/T1(I,J,K))**2 DO 6400 M=1,IS2(5) FV1(I,J,K,L,M)=FV2(I,J,K,L,M)**2-2.0 6400 CONTINUE CDEC$ ASSERT(MV1.NE.MV2) DO 6500 I=2, NPARHD XP1(MV1,I) = XP1(MV2,I-1)*XS1(I) + XP2(MV2,I-1)*XS2(I) XP2(MV1,I) = XP2(MV2,I-1)*XS1(I) + XP1(MV2,I-1)*XS2(I) 6500 CONTINUE CALL MAYSET MV1 = 1 MV2 = 2 DO 6600 I=2, NPARHD XP1(MV1,I) = XP1(MV2,I-1)*XS1(I) + XP2(MV2,I-1)*XS2(I) XP2(MV1,I) = XP2(MV2,I-1)*XS1(I) + XP1(MV2,I-1)*XS2(I) 6600 CONTINUE RETURN END SUBROUTINE MLTILP INCLUDE 'VEC-PAR.INCL' DIMENSION ZA(2,200), ZB(200,2) C 8 tests involving loop decomposition and reordering C Tests to see if first half and second half of loop are C separately vectorized. DO 100 I=1,NPARHD XS1(I) = XS1(NPARHD-I+1) 100 CONTINUE DO 200 I=1,NPARHD XS1(I) = XS1(NPARHD-I+1)*V1 +XS2(I) 200 CONTINUE CALL MAYSET CDEC$ ASSERT(NV2.GT.0) DO 310 J=1,N DO 300 I=1,NPARHD XP1(I,J) = XP1(I+NV2, N-J+1) 300 CONTINUE 310 CONTINUE C Loop interchanging is needed for the next ones DO 1010 J=1,N DO 1000 I=2,NPARHD XP1(I,J) = XP1(I-1, J)*XP2(I,J) 1000 CONTINUE 1010 CONTINUE DO 1120 J=2,N DO 1100 I=2,NPARHD XP1(I,J) = XP1(I-1,J)*XP2(I,J) 1100 CONTINUE DO 1110 I=2,NPARHD XP1(I,J) = XP1(I,J-1)*XP2(I,J) 1110 CONTINUE 1120 CONTINUE C Wavefront can move diagonally across grid here. In other C words, logically replace the next two DOs with a single C two-dimensional DO sweeping the 2-D grid from corner to C corner. CALL MAYSET DO 2010 J=2,NPARHD DO 2000 I=2,NPARHD XP1(I,J)=(XP1(I-1,J)+XP1(I,J-1))*V2+XP1(I,J) 2000 CONTINUE 2010 CONTINUE C Does dimension info carry forward to loop ordering code? C Following two passed, if loop order interchanged. CALL MAYSET DO 3000 I=1,M DO 3000 J=1,N ZA(J,I) = SQRT(XP1(I,J)) 3000 CONTINUE DO 3100 I=1,M DO 3100 J=1,N ZB(I,J) = SQRT(XP1(I,J)) 3100 CONTINUE RETURN END SUBROUTINE BRNCHG INCLUDE 'VEC-PAR.INCL' C 36 tests involving branching DO 100 I=1,N XS1(I) = XS2(I) IF (XS2(I) .EQ. 0.0) XS1(I) = XS3(I) 100 CONTINUE DO 200 I=1,N IF (XS2(I) .LT. 0.0) XS1(I) = XS2(I)**2 IF (XS2(I) .GE. 0.0) XS1(I) = XS3(I) 200 CONTINUE DO 300 I=1,N IF (XS2(I) .LT. 0.0) XS1(I) = XS2(I)**2 IF (XS2(I) .GE. 0.0) XS1(I) = XS3(I) IF (XS2(I) .GE. 1.0) XS1(I) = XS3(I)**3 300 CONTINUE C Probably shouldn't vectorize the next one, faster in scalar DO 400 I=1,N IF (XS2(I) .LE. -2.0) XS1(I) = (XS2(I)- XS3(I))**2 IF (XS2(I) .LT. 0.0) XS1(I) = XS2(I)**2 IF (XS2(I) .GE. 0.0) XS1(I) = XS3(I) IF (XS2(I) .GE. 1.0) XS1(I) = XS3(I)**3 IF (XS2(I) .GE. 2.0) XS1(I) = (XS2(I)+ XS3(I))**2 IF (XS2(I) .GE. 3.0) XS1(I) = EXP(XL1(I)) IF (XS2(I) .GE. 4.0) XS1(I) = XL3(I)**3 IF (XS2(I) .GE. 5.0) XS1(I) = (XL2(I)+ XL3(I))**2 400 CONTINUE DO 500 I=1,N XS1(I) = EXP(XS2(I)) 500 CONTINUE DO 600 I=1,N XS1(I) = ATAN(XS2(I)) 600 CONTINUE DO 700 I=1,N XS1(I) = ATAN2(XS2(I),XS3(I)) 700 CONTINUE DO 800 I=1,N XS1(I) = ATAN2(XS2(I),V2) 800 CONTINUE DO 900 I=1,N XS1(I) = XS2(I)**2+XS3(I) IF(I .EQ. 1) THEN XS1(I) = 0.0 ELSE IF(I .EQ. N) THEN XS1(I) = 1.0 ELSE IF(XS3(I).GT.1.E22) THEN XS1(I) = 1.E22 ENDIF 900 CONTINUE C Various branching constructs DO 1000 I=1,NPARHD IF(L1(I)) THEN XS1(I) = XS2(I)**2 ELSE XS1(I) = XS3(I)**2 ENDIF 1000 CONTINUE DO 1100 I=1,NPARHD IF(L1(I)) THEN XS1(I) = XS2(I)**2 ELSE XL1(I) = XS3(I)**2 ENDIF 1100 CONTINUE DO 1200 I=1,NPARHD V1 = XS2(I)**2 + XS3(I)**2 IF(V1 .NE. 0) THEN XS1(I) = XS2(I)**2 ELSE XS1(I) = XS3(I)**2 ENDIF 1200 CONTINUE DO 1300 I=1,NPARHD V1 = XS2(I)**2 + XS3(I)**2 IF(V1 .NE. 0) THEN XS1(I) = V1 ELSE XS1(I) = XS3(I)**2 ENDIF 1300 CONTINUE DO 1400 I=1,NPARHD V1 = XS2(I)**2 + XS3(I)**2 IF(V1 .NE. 0) THEN XS1(I) = SQRT(V1) ELSE XS1(I) = XS3(I)**2 ENDIF 1400 CONTINUE C IF test in next loops implies gather, scatter, compress or C expand required J=1 DO 1500 I=1,NPARHD IF(L1(I)) THEN XS1(I) = XS2(J)**2 J=J+1 ENDIF 1500 CONTINUE J=0 DO 1600 I=1,NPARHD J=J+1 IF(L1(I)) THEN XS1(I) = XS2(J)**2 J=J+1 ENDIF 1600 CONTINUE J=0 C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 1700 I=1,NPARHD J=J+1 IF(L1(I)) THEN XS1(I) = XS2(J)**2 J=J+I1(I) ENDIF 1700 CONTINUE C Unusual loops like those from some schrelecht programs DO 2000 I=MAX(J,K),N XP1(I,K) = XP1(I,K) + XP2(J,I)*XP2(I,K) 2000 CONTINUE DO 2150 I=1,N IF (XS1(I)) 2100,2110,2120 2100 CONTINUE XL1(I) = XS1(I)**2 GOTO 2150 2110 CONTINUE XL1(I) = XS2(I)**2 GOTO 2150 2120 CONTINUE XL1(I) = XS3(I)**2 2150 CONTINUE DO 2250 I=1,N GOTO(2200,2210,2220) I1(I) 2200 CONTINUE XL1(I) = XS1(I)**2 GOTO 2250 2210 CONTINUE XL1(I) = XS2(I)**2 GOTO 2250 2220 CONTINUE XL1(I) = XS3(I)**2 2250 CONTINUE J=13 DO 2300 I=1,NPARHD IF(L1(I)) THEN J=J+1 XS1(I) = XS1(J)*XS2(J) + XS3(I) ELSE J=J+1 XS1(I) = XS2(J)*XS3(J+2) - XS3(I) ENDIF 2300 CONTINUE DO 3000 I=1,NPARHD IF(L1(I)) THEN V1 = XS1(I)**2 + XS2(I)**2 ELSE V1 = XS1(I) + XS2(I) ENDIF XL1(I) = V1 XL2(I) = 2.0*V1 3000 CONTINUE C Loop interchange needed despite embedded IF DO 3110 I=1,NPARHD IF(L1(I)) THEN DO 3100 J=1, NPARHD XP1(I,J) = XP1(I,J-1)*XP3(I,J) + XP2(I,J-1) 3100 CONTINUE ENDIF 3110 CONTINUE C The next loops can be rewritten as loops with C index-tests, like the 2000, 2100 loops NDCODE K=NPARHD DO 4000 I=1,NPARHD XS1(I) = XS2(K) K=I 4000 CONTINUE L=1 K=NPARHD DO 4100 I=1,NPARHD XS1(I) = (XS2(K)+XS3(L))*V2 K=I L=NPARHD-I+1 4100 CONTINUE L=1 K=NPARHD DO 4200 I=1,NPARHD XS1(I) = (XS2(K)+XS3(L))*V2 L=K K=I 4200 CONTINUE C Compress and expand loops--special type of C gather/scatter J=1 DO 5000 I=1,NPARHD IF(L1(I)) THEN XS1(I) = XS2(J) J=J+1 ENDIF 5000 CONTINUE J=1 DO 5100 I=1,NPARHD IF(L1(I)) THEN XS1(J) = XS2(I) J=J+1 ENDIF 5100 CONTINUE J=1 C Gather and scatter both required in next loops. J=1 K=1 DO 5200 I=1,NPARHD IF(L1(I)) THEN XS1(K) = XS2(J) J=J+1 ENDIF IF(L2(I)) K=K+1 5200 CONTINUE C$DIR NO_RECURRENCE CDIR$ IVDEP J=1 K=1 CDEC$ INIT_DEP_FWD DO 5300 I=1,NPARHD IF(L1(I)) THEN XS1(K) = XS1(J) J=J+1 ENDIF IF(L2(I)) K=K+1 5300 CONTINUE J=1 K=1 DO 5400 I=1,NPARHD IF(L2(I)) J=J+1 IF(L1(I)) THEN XS1(K) = XS2(J) K=K+1 ENDIF 5400 CONTINUE C In effect, the I/O statements below are vectorizable DO 6000 I=1,NPARHD XS1(I) = XS2(I)**2 + XS3(I) READ (6,1) XS3(I) 1 FORMAT(3E15.5) 6000 CONTINUE DO 6100 I=1,NPARHD XS1(I) = XS2(I)**2 + V2 READ (6,1) V2 6100 CONTINUE DO 6200 I=1,NPARHD XS1(I) = XS2(I)**2 + V2*V1 READ (6,1) V1,V2,V3 6200 CONTINUE DO 6300 I=1,NPARHD XS1(I) = XS2(I)**2 + V2 WRITE (5,1) XS1(I) 6300 CONTINUE DO 6400 I=1,NPARHD XS1(I) = XS2(I)**2 + V2 IF(XS1(I).LT.0) STOP 'UGH' 6400 CONTINUE RETURN END SUBROUTINE RCRSON INCLUDE 'VEC-PAR.INCL' DIMENSION ZA(200),ZC(2), ZD(2),ZE(2) EQUIVALENCE (ZA,ZD), (ZA(4),ZE), (ZA(100),ZC) C 35 tests for carefulness of ambiguity checking. DO 100 I=1,N XS1(I) = XS1(I+NPAR2)+XS3(I) !Parmtr NPAR2 positive 100 CONTINUE C NV2 is unknown. Test passed if execution-time checking done. DO 200 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 200 CONTINUE C Test passed if compiler directives allows vectorization. C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 300 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 300 CONTINUE NV2 = 2 DO 400 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 400 CONTINUE C NV2 is still known now, but may have been forgotten. DO 500 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 500 CONTINUE C NOSET doesn't change the value of NV2 -- it's still 2 CALL NOSET DO 600 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 600 CONTINUE C Test passed if ASSERT is allowed CALL NOSET CDEC$ ASSERT (NV2 .GT. 0) DO 700 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 700 CONTINUE C SETNV sets the value of NV2 to 2 ==> still vectorizable CALL SETNV(NV2) DO 800 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 800 CONTINUE NV2 = 3 DO 1010 I=1,N DO 1000 J=1,M XP1(I,J) = XP1(I+NV2,J-1)+XS3(I) 1000 CONTINUE 1010 CONTINUE DO 1110 J=1,N DO 1100 I=1,M XP1(I,J) = XP1(I+NV2,J-1)+XS3(I) 1100 CONTINUE 1110 CONTINUE DO 1210 I=1,N DO 1200 J=1,M XP1(J,I) = XP1(J+NPAR2,I)+XS3(J) 1200 CONTINUE 1210 CONTINUE DO 1300 J=1,M XP1(I,J) = XP1(I+NV2,J+NV3)+XS3(I) 1300 CONTINUE CALL MAYSET DO 1410 I=1,N C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 1400 J=1,M XP1(I,J) = XP1(I+NV2,J+NV3)+XS3(I) 1400 CONTINUE 1410 CONTINUE NV2 = 2 DO 1510 I=1,N DO 1500 J=1,M XP1(I,J) = XP1(I+NV2,J+NV3)+XS3(I) 1500 CONTINUE 1510 CONTINUE DO 1610 I=1,N DO 1600 J=1,M XP1(I,J) = XP1(I+NV2,J+NV3)+XS3(I) 1600 CONTINUE 1610 CONTINUE C ??? what did I have in mind here??? DO 1710 I=1,N DO 1700 J=1,M XP1(I,J) = XP1(J,I)*XP2(J,I) 1700 CONTINUE 1710 CONTINUE C Following loop uses more difficult subscript. C J is actually 1 so it is vectorizable K=NPAR2*2 J=NPAR2-1 DO 2000 I=1, NPARHD XS1(I) = XS1(I+J)*XS3(I) + XS2(I) 2000 CONTINUE C Following test is ambiguous because of possible C overlap of input and output subscript values. C Implies directive required. C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD C Tell compiler that there is no overlap DO 2100 I=1, NPARHD XS1(I1(I)) = XS1(I2(I))*XS3(I) 2100 CONTINUE C NVL is known in 2310 loop but may have been forgotten. C NVL is local so MAYSET cannot change it. NVL = 2 DO 2200 I=1,N XS1(I) = XS1(I+NVL)*XS3(I)+XS2(I) 2200 CONTINUE CALL MAYSET DO 2210 I=1,N XS1(I) = XS1(I+NVL)*XS3(I)+XS2(I) 2210 CONTINUE C Is limit checking careful enough to see no overlap in C next loop CALL MAYSET DO 2300 I=1,NPARHD-1 XS1(I) = XS1(2*NPARHD-I)*XS3(I)+XS2(I) 2300 CONTINUE C Should (or can) vectorize the J (3010) loop C but not the I (3000) loop. IF(NV2.LT.0) THEN DO 3000 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 3000 CONTINUE ELSE DO 3010 J=1,N XS1(J) = XS1(J+NV2)+XS3(J) 3010 CONTINUE ENDIF C Should (or can) vectorize the J (3110) loop C but not the I (3100) loop. NV3 = NV2-5 IF(NV2.LT.5) THEN DO 3100 I=1,N XS1(I) = XS1(I+NV3)+XS3(I) 3100 CONTINUE ELSE DO 3110 J=1,N XS1(J) = XS1(J+NV3)+XS3(J) 3110 CONTINUE ENDIF C Should (or can) vectorize the J (3210) loop C but not the I (3200) loop. IF(NV2.LT.5) THEN NV3 = NV2-5 DO 3200 I=1,N XS1(I) = XS1(I+NV3)+XS3(I) 3200 CONTINUE ELSE NV3 = NV2-5 DO 3210 J=1,N XS1(J) = XS1(J+NV3)+XS3(J) 3210 CONTINUE ENDIF C Should (or can) vectorize the J (3310) loop C but not the I (3300) loop. NV3 = (NV2-5)*2 +1 IF(NV2.LT.5) THEN DO 3300 I=1,N XS1(I) = XS1(I+NV3)+XS3(I) 3300 CONTINUE ELSE DO 3310 J=1,N XS1(J) = XS1(J+NV3)+XS3(J) 3310 CONTINUE ENDIF C The next group of loops are all involve simple linear recursion C and can be vectorized by recursive doubling, e.g. C NOTES: 1. If these tests are passed, some of the other tests C should be rechecked to ensure recursive doubling C didn't make it look like other tests passed or C failed when the contrary was true. C 2. These tests have very large trip counts so C recursive doubling makes sense here even when it C may not in those other cases. C Sets the array to the XL1(1) value, but looks like recursion DO 4000 I=1,NPARMN XL1(I) = XL1(1) 4000 CONTINUE C Sets the array to the XL1(1) value, but looks like recursion DO 4100 I=2,NPARMN XL1(I) = XL1(I-1) 4100 CONTINUE C Sets the array to XL1(1) + (I-1) * V1, but looks recursive DO 4200 I=2,NPARMN XL1(I) = XL1(I-1) + V1 4200 CONTINUE C Like above but a product instead of a sum recursion DO 4300 I=2,NPARMN XL1(I) = XL1(I-1) * V2 4300 CONTINUE DO 4400 I=2,NPARMN XL1(I) = XL1(I-1) * V2 + XL3(I) 4400 CONTINUE DO 4500 I=2,NPARMN XL1(I) = XL1(I-1) * V2 + XL3(I)*XL1(I-2) +XL2(I) 4500 CONTINUE C Next tests are for EQUIVALENCE checking. NONE of C these should vectorize (at least not without C recursive doubling). DO 5000 I=1,NPARHD ZE(I) = ZA(I)*XS1(I) + XS2(I) 5000 CONTINUE DO 5100 I=1,NPARHD ZE(I) = ZD(I)*XS1(I) + XS2(I) 5100 CONTINUE DO 5200 I=1,NPARHD ZD(I+NV2) = ZA(I)*XS1(I) + XS2(I) 5200 CONTINUE C Similar to those above but here there is no overlap, C so these should vectorize. DO 5300 I=1,64 ZC(I) = ZA(I)*XS1(I) + XS2(I) 5300 CONTINUE DO 5400 I=1,64 ZC(I) = ZD(I)*XS1(I) + XS2(I) 5400 CONTINUE RETURN END SUBROUTINE GLOBAL INCLUDE 'VEC-PAR.INCL' DIMENSION ZA(200),ZB(200), ZC(200),ZD(200) CDIR$ VFUNCTION VCTFN VINLNF(X,Y,Z) = X**2 + Y**2 + Z**2 JLOC = 12345 C 28 tests that involve subroutines. All vectorizable. DO 100 I=1,N XS1(I) = AMAX(XS2(I),XS3(I),XL2(I)) 100 CONTINUE DO 200 I=1,N XS1(I) = ABS(XS2(I)**3 + XS3(I)**2)*XL2(I) 200 CONTINUE DO 300 I=1,N XS1(I) = SQRT(XS2(I)**2 + XS3(I)**2)*XL2(I) 300 CONTINUE DO 400 I=1,N XS1(I) = VINLNF(XS1(I), XS2(I), XS3(I)) 400 CONTINUE C Next loops test vectorization of Intrinsic Functions DO 1000 I=1,NPARHD XS1(I) = XS2(I)**N 1000 CONTINUE DO 1100 I=1,NPARHD XS1(I) = XS2(I)**V1 1100 CONTINUE DO 1200 I=1,NPARHD XS1(I) = EXP( I*V1*ALOG(XS2(I)) ) 1200 CONTINUE C Next two probably vectorize if written like one above. DO 1300 I=1,NPARHD XS1(I) = XS2(I)**I 1300 CONTINUE DO 1400 I=1,NPARHD XS1(I) = XS2(I)**(I*V1) 1400 CONTINUE DO 1500 I=1,NPARHD XS1(I) = XS2(I)**N*SIN(XS2(I))*EXP(XS3(I)) 1500 CONTINUE DO 1600 I=1,NPARHD XS1(I) = SQRT(SQRT(XS2(I)**2+XS3(I)**2)) 1600 CONTINUE DO 1700 I=1,NPARHD XS1(I) = TAN(SQRT(SQRT(XS2(I)**2+XS3(I)**2))) 1700 CONTINUE C Does it know intrinsics have no side-effects? DO 1800 I=1,NPARHD V1 = XS2(I)*3.14159 XS1(I) = TAN(SQRT(SQRT(XS2(I)**2+XS3(I)**2))) XL1(I) = V1 1800 CONTINUE C Does it know intrinsics have no side-effects? DO 1900 I=1,NPARHD V1 = SIN(XS2(I))*3.14159 XS1(I) = TAN(SQRT(SQRT(XS2(I)**2+XS3(I)**2))) XL1(I) = V1*XS1(I)+XS3(I) 1900 CONTINUE C More intrinsic function tests DO 2000 I=1,NPARHD XS1(I) = AMAX(XS2(I)**2, XS3(I)**2) 2000 CONTINUE DO 2100 I=1,NPARHD XS1(I) = AMAX(XS2(I)**2, XS3(I)**2, XS2(I)**3) 2100 CONTINUE DO 2200 I=1,NPARHD XS1(I) = AMAX(XS2(I)**2, XS3(I)**2, XS2(I)**3, XS1(I)) 2200 CONTINUE C A cleverly written random number generator/compiler needed C (A vector random number generator needed to vectorize) DO 2300 I=1,NPARDH XS1(I) = XS3(I)**2 * RAN(JLOC) 2300 CONTINUE C Vectorized WRITE needed for next one DO 2400 I=1,NPARDH WRITE(6,1) XS1(I),XS3(I) 1 FORMAT(' The values are:',2E20.10) 2400 CONTINUE DO 2400 I=1,NPARDH XS1(I) = XS3(I)**2 * RAN(JLOC) WRITE(6,1) XS1(I),XS3(I) 2400 CONTINUE C For Cray, VCTFN must be written in Assembly language C or the VFUNCTION directive must be removed (and then C there will be no vectorization). DO 3000 I=1,N XS1(I) = VCTFN(XS2(I),XS3(I)) 3000 CONTINUE C The next loops can vectorize if the loop code is moved into C the loop, the loop into the subroutine, or some similar C multi-routine analysis. C Various attacks that work for next two. The subroutines need C to get info that fourth argument is positive. C Statement numbers on CONTINUES included for referencing only. CALL VCTSB2(XS2,XS3,XL2, 2, NPARHD) 3100 CONTINUE NV2 = 2 CALL VCTSB2(XS2,XS3,XL2, NV2,NPARHD) 3200 CONTINUE C Subroutine code must be moved in line in next one because C of poor CALLing structure. DO 3300 I=1,N CALL VCTSUB(XS2(I),XS3(I),XL2(I)) 3300 CONTINUE C Subroutine code must be moved in line in next one because C of poor CALLing structure. DO 3400 I=1,N XS1(I) = XS3(I)*XL2(I) CALL VCTSUB(XS2(I),XS3(I),XL2(I)) 3400 CONTINUE C Next loops are vectorizable because Z* are local so the C subroutine CALLs cannot interfere with them. DO 4000 I=1, NPARHD ZA(I) = ZB(I)*ZC(I) CALL ANYTHG 4000 CONTINUE DO 4100 I=1, NPARHD ZA(I) = ZB(I)*ZC(I) CALL ANYTHG ZD(I) = ZB(I) - ZC(I) 4100 CONTINUE DO 4200 I=1, NPARHD ZA(I) = ZB(I)*ZC(I) VLOC = ZA(I)**2 CALL ANYTHG ZD(I) = VLOC-ZB(I) 4200 CONTINUE RETURN END SUBROUTINE TSTDIR INCLUDE 'VEC-PAR.INCL' C 6 tests of assertion statements and directives. C Test to see if ASSERT carries forward CDEC$ ASSERT(NV2.GT.0) DO 100 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 100 CONTINUE DO 110 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 110 CONTINUE C Test to see what happens with conflicting ASSERTs. At C least a warning should be given. CDEC$ ASSERT(I.GT.I+NV2) CDEC$ ASSERT(NV2.GT. 0) DO 200 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 200 CONTINUE C Should (or can) vectorize the J (1010) loop C but not the I (1000) loop. Is compiler smart enough C to do what's right, not what it's told to do? C Or at least give a warning that there's a conflict? IF(NV2.LT.0) THEN C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 1000 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 1000 CONTINUE ELSE DO 1010 J=1,N XS1(J) = XS1(J+NV2)+XS3(J) 1010 CONTINUE ENDIF C Is compiler smart enough C to do what's right, not what it's told to do? C Or at least give a warning that there's a conflict? IF(NV2.LT.0) THEN CDEC$ ASSERT(NV2.GE.0) DO 1100 I=1,N XS1(I) = XS1(I+NV2)+XS3(I) 1100 CONTINUE ENDIF C Test for conflicting directives--diags should be given CDEC$ ASSERT(NV2 .GT. 0) C ====> NOT vectorizable C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 2000 I=1,NPARHD XS1(I) = XS1(I)*XS2(I) + XS3(I) XS2(I+NV2) = XS2(I)*XS3(I) + XS3(I-1) 2000 CONTINUE C Is compiler smart enough C to do what's right, not what it's told to do? C Or at least give a warning that there's a conflict? C$DIR NO_RECURRENCE CDIR$ IVDEP CDEC$ INIT_DEP_FWD DO 2100 I=2, NPARHD XS1(I) = XS1(I-1)*XS2(I)+XS3(I) 2100 CONTINUE C Need directives for: listing control, no-vectorization. C Listing and similar directives NOT tested RETURN END SUBROUTINE MISC INCLUDE 'VEC-PAR.INCL' C 18 tests that do not readily fit into earlier groups. C Sum-Reduction loop J=11 DO 100 I=1,N XS1(J) = XS1(J) + XS3(I) 100 CONTINUE DO 200 I=1,N XS1(J) = XS1(J) + XS3(I)*XS2(I)*I+SQRT(XS2(I)**3) 200 CONTINUE C Running convolution type codes DO 310 J=1,M DO 300 I=1,N XS1(J) = XS1(J) + XS3(I+J)*XS2(I+J) 300 CONTINUE 310 CONTINUE C Running convolution type codes DO 410 J=1,M DO 400 I=1,N XS1(J) = XS1(J) + SQRT(XS3(I+J)**2 + XS2(I+J)**2) 400 CONTINUE 410 CONTINUE C Running convolution type codes DO 510 J=1,M DO 500 I=1,N V2 = XP1(I,J)*XP2(I,J-1) V3 = XP1(I,J)-XP2(I-1,J) XS1(J) = XS1(J) + XS3(I+J)*XS2(I+J) + V2*V3 500 CONTINUE 510 CONTINUE C Product-Reduction loop J=1 DO 600 I=1,N XS1(J) = XS1(J) * XS3(I) 600 CONTINUE C Messy loop that should vectorize DO 1000 I=1,N V1 = XL1(I)*XS2(I-1) V2 = XL1(I)**2 + XS3(I)**3 XV3 = V1*V2 + XS2(I+3) XP1(I,NV1) = V1+V2*XV3 V1 = XP2(I,NV1)*XS3(I) + XS2(I+3) V2 = XV3 * V1 + XP2(NV2, I) XP3(NV2,I) = V1*V2 + ABS(XV3) V1 = (XL1(I) + XS2(I))*V2 XS1(I) = XL1(J) + XS3(I)*XS2(I)*I+SQRT(V1**3) 1000 CONTINUE C Uses a temporary variable that = I*cst V1 = 0 DO 2000 I=1,N V1 = V1 + V2 XS1(I) = V1 2000 CONTINUE C Uses a temporary variable that = I*cst V1 = 0 DO 2100 I=1,N V1 = V1 + V2 XS1(I) = V1 XS2(I) = V1**2 XS3(I) = SIN(V1 + XS2(I)) 2100 CONTINUE C Search-type loops DO 3000 I=1,N XS1(I) = XS2(I)**2 IF(XS3(I).LT.0.4) GOTO 3010 3000 CONTINUE 3010 CONTINUE DO 3100 I=1,NPARHD IF(I1(I) .EQ. NV1) GOTO 3110 3100 CONTINUE 3110 CONTINUE C Search for max element XMAX = XS3(1) DO 3200 I=2,N IF(XS3(I) .LT. XMAX) XMAX = XS3(I) 3200 CONTINUE C Search for max element and its index XMAX = XS3(I) IMAX = 1 DO 3300 I=2,N IF(XS3(I) .LT. XMAX) THEN XMAX = XS3(I) IMAX = I ENDIF 3300 CONTINUE DO 4000 I=1,N XS1(I) = XS2(I)**2 IF(I.GT.44) GOTO 4010 4000 CONTINUE 4010 CONTINUE I=1 4100 CONTINUE XS1(I) = XS2(I)**2 I=I+1 IF(I.LT.N) GOTO 4100 I=1 4200 CONTINUE XS1(I) = XS2(I)**2 I=I+1 IF(I.GT.N) GOTO 4210 GOTO 4200 4210 CONTINUE I=1 4300 CONTINUE I=I+1 IF(I.GT.N) GOTO 4310 XS1(I) = XS2(I)**2 GOTO 4300 4310 CONTINUE CLH Following loop is non-standard Fortran C Statement numbers on CONTINUE included for referencing only. I=0 DO WHILE (I .LT. N) I=I+1 XS1(I) = XS2(I)*XS3(I) ENDDO 4400 CONTINUE RETURN END SUBROUTINE NDCODE INCLUDE 'VEC-PAR.INCL' VINLNF(X,Y,Z) = X**2 + Y**2 + Z**3 CDIR$ CODE C 25 tests where the code generated by the compiler needs to be C checked to really determine how well the compiler did. C Test passed if converted to one loop or interchanged. CALL S100 DO 100 I=1,200 DO 100 J=1,200 XP1(I,J) = 0.0 100 CONTINUE C Test passed if converted to one loop or interchanged. CALL S200 DO 200 I=1,200 DO 200 J=1,2 XP1(I,J) = 0.0 200 CONTINUE C Test passed if loop order interchanged CALL S300 DO 300 I=1,100 DO 300 J=1,2 XP1(I,J) = 0.0 300 CONTINUE C Test passed if scalar-vector operations C are used, not broadcasting the scalar. CALL S400 DO 400 I=1,N XS1(I) = VINLNF(XS1(I), XS2(I), V1) 400 CONTINUE C Test passed if converted to one loop. CALL S500 DO 500 I=1,8 DO 500 J=1,8 XPS1(I,J) = 0.0 500 CONTINUE C Test passed if loop order is interchanged in this convolution CALL S600 M = N/2 DO 600 I = 1+M, 200000-M DO 600 J=-M, M XL1(I) = XL1(I) + XL2(I+J)*XS1(J+M+1) 600 CONTINUE C Test passed if loop order is interchanged in this convolution CALL S700 DO 700 I = 1+(N/2), 200000-(N/2) DO 700 J=-(N/2), (N/2) XL1(I) = XL1(I) + XL2(I+J)*XS1(J+(N/2)+1) 700 CONTINUE C Loops below should be interchanged only for the first C half of the values of the outermost loop. CALL S800 DO 820 I=1,M DO 810 J=1,2**M, 2**(M-I) DO 800 K=1,2**M, 2**I XS1(K) = (XS2(J) + XS2(2**M-J+1))*XS3(I) 800 CONTINUE 810 CONTINUE 820 CONTINUE C Matrix multiply -- convert to outer-product form? CALL S1000 DO 1020 I=1,M DO 1010 J=1,N DO 1000 K=1,L XP1(I,J) = XP1(I,J) + XP2(I,K)*XP3(K,J) 1000 CONTINUE 1010 CONTINUE 1020 CONTINUE C Also convertible, but not just a matrix multiply CALL S1100 DO 1110 I=1,M DO 1100 J=1,N DO 1100 K=1,L XP1(I,J) = XP1(I,J) + XP2(I,K)*XP3(K,J)+XS3(J) 1100 CONTINUE 1110 CONTINUE C There are redundant stores below: either a diag or dead C code elimination should be done, preferably a C diagnostic given that code is ridiculous. CALL S1200 DO 1210 I=1,NPARHD,NPAR2 DO 1200 J=1, I XP1(NPARHD-I+1,I) = 1.144 * XP2(I+J, I-J) 1200 CONTINUE 1210 CONTINUE C Should do as a loop from 2,N with explicit code for I=1 case CALL S2000 DO 2000 I=1,N XS1(I) = XS2(I)**2+XS3(I) IF(I .EQ. 1) XS1(I) = 0.0 2000 CONTINUE C Should do as a loop from 2,N-1 with explicit code for C I=1 and for I=N cases CALL S2100 DO 2100 I=1,N XS1(I) = XS2(I)**2+XS3(I) IF(I .EQ. 1) THEN XS1(I) = 0.0 ELSE IF(I .EQ. N) THEN XS1(I) = 1.0 ENDIF 2100 CONTINUE C Should move the IF test outside the loop below CALL S2200 DO 2200 I=1,NPARHD IF(LI(1)) THEN XS1(I) = XS2(I)+XS3(I)**2 ENDIF 2200 CONTINUE C Following loop should be a block of vector code with no C strip-mining loop surrounding. Duplicate of loop 2000 C in SIMPLE. CALL S3000 DO 3000 I=1,64 XS1(I) = XS2(I) + XS3(I) 3000 CONTINUE C Should get an interrupt on overflow but not on C zero XS3 values CALL S4000 DO 4000 I=1,N IF(XS3(I).NE.0) XS1(I) = XS2(I)/XS3(I) 4000 CONTINUE C Should get an interrupt on divide by zero below but not above. CALL S4100 DO 4100 I=1,N XS1(I) = XS2(I)/XS3(I) 4100 CONTINUE C K Subscript is linear, NO gather required. CALL S5000 K=1 DO 5000 I=1,N K = K+1 XS1(I) = XL2(K) + XS3(I) K=K+2 XS2(K) = XS1(I)*XS3(I) K=K+1 XL1(K) = XL2(K)* XS3(I) 5000 CONTINUE C Checks to see if several loops are coalesced into a single one. CALL S5100 DO 5100 I=1,NPARHD XS1(I) = 0.0 5100 CONTINUE XS2(I) = 1.0 5110 CONTINUE DO 5120 I=1,NPARHD XS3(I) = 2.0 5120 CONTINUE DO 5130 I=1,NPARHD XL1(I) = -1.0 5130 CONTINUE DO 5140 I=1,NPARHD XL2(I) = -12.0 5140 CONTINUE C Five loops above are a single test for loop coalescing. C Check to see if one loop is separated into several. CALL S5200 DO 5200 I=1,NPARHD IF(I.LT.11) XS1(I) = 0.0 IF((I.GE.11).AND.(I.LT.33)) XS1(I) = 1.0 IF(I.GE.33) XS1(I) = 3.0 5200 CONTINUE C Check to see if several loops are concatenated into one. CALL S5300 DO 5300 I=1,13 XS1(I) = 1.0 5300 CONTINUE DO 5310 I=14,33 XS1(I) = 1.0 5310 CONTINUE DO 5320 I=34,NAPRHD XS1(I) = 1.0 5320 CONTINUE C Three loops above are a single test for loop concatenation. C Test for rerolling loop. Test is passed by converting to a C loop of increment 1. CALL S5400 DO 5410 I=1,5,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5410 CONTINUE DO 5420 I=1,10,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5420 CONTINUE DO 5430 I=1,20,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5430 CONTINUE DO 5440 I=1,30,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5440 CONTINUE DO 5450 I=1,50,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5450 CONTINUE C End of single rerolling test C Second rerolling test: should be a loop of incr=1, length = 64. CALL S5500 DO 5500 I=1,60,5 XS1(I) = XS2(I)*XS3(I)+XS2(I+1)*XS3(I+1)+XS2(I+2)*XS3(I+2) + + XS2(I+3)*XS3(I+3)+XS2(I+4)*XS3(I+4) 5500 CONTINUE C Compute v1**n CALL S6000 DO 6000 I=1, N X = X*V1 6000 CONTINUE C Evaluate polynomial CALL S6100 DO 6100 I=1, N X = X*V1+V2 6100 CONTINUE C***** C Other tests not included: C Scalar optimiztion such as loop unrolling C Array bounds checking outside of loops, not in loops C***** RETURN END