C C*********************************************************************** C C K-Means Algorithm Adapted By Glenn Milligan From M. R. Anderberg C Cluster Analysis For Applications, Academic Press, 1973 C Prepared May 1978 C The Methods Include Jancey's And Forgy's Procedures C Revised By Richard Cheng, November 1995. C C*********************************************************************** C C NOTE: C C FUNCTION DIST MAY NEED TO BE CHANGED TO PROVIDE THE C DISTANCE MEASURE THAT SEEMS MOST APPRORPIATE C FOR THE DATA AT HAND C CURRENT VERSION COMPUTES EUCLIDEAN DISTANCES ON RAW DATA C C C C C C THE PROGRAM IS CURRENTLY SET UP FOR: C UP TO 100 DATA UNITS C UP TO 20 VARIABLES C UP TO 10 CLUSTERS C IF MORE STORAGE IS REQUIRED, THE FOLLOWING PARAMETERS WILL C HELP IN MAKING THE CHANGES IN THE DIMENSION STATEMENTS C C NE= # OF ENTITIES TO BE CLUSTERED C NV= # OF VARIABLES C NC= # OF CLUSTERS C SET BLK1 AS FOLLOWS: C CENTR(NC*NV) C NUMBR(NC) C MEMBR(NE) C TOTAL(NC*NV) OR TOTAL(NE) WHICHEVER IS LARGER C DATA(NE*NV) C FUNCTION DIST MAY NEED ATTENTION IN TERMS OF NV C SUBROUTINE RANDOM WOULD ALSO NEED ATTENTION IN TERMS OF NC C LOGICAL ARRAY WD(NE,NE) IN SUBROUTINE RESULT C ARRAY EVEC(NE*(NE-1)/2) IN SUBROUTINE RESULT C C*********************************************************************** C INCLUDE 'GREX.FH' PARAMETER (MAXPTS=550, MAXVAR=30, MAXCLS=40) C C MAXPTS = Maximum number of data elements C MAXVAR = Maximum number of variables C MAXCLS = Maximum number of clusters C COMMON/BLK1/CENTR(MAXVAR*MAXCLS),NUMBR(MAXCLS),MEMBR(MAXPTS), * TOTAL(MAXPTS*2),DATA(MAXPTS*MAXVAR),TITLE(20) COMMON /BLK2/ NE,NV,NC,MINREL,IPART,METHOD,NSUB,LEVEL,ICODE,PFLAG COMMON /BLK3/ BDATA(MAXPTS*MAXVAR),CTROLD(MAXVAR*MAXCLS) COMMON /BLK4/ FMT DIMENSION MBROLD(MAXPTS),MBRADJ(MAXPTS),NHA(MAXCLS,MAXCLS) DIMENSION NID(MAXCLS),NDJ(MAXCLS),NFIRST(MAXCLS) DIMENSION PBS(MAXPTS),HA(MAXPTS),CDATA(MAXVAR*2) DIMENSION ASEED(10),NUMOLD(MAXCLS) CHARACTER*20 FILIN,OUTFIL,FILCEN CHARACTER*20 NAME(2) CHARACTER*40 FMT,FMTCEN CHARACTER PAL(17) LOGICAL TEST DATA NAME/'Jancey''s Algorithm','Forgy''s Algorithm'/ DATA PAL /'4','3','3','3','3','3','3','3','3','3','3','3', * '3','3','3','3','4'/ INTEGER PAUSE,ERR,OMODE,IARGV,PFLAG C C PAUSE, ERR, OMODE, IARGV are display graphics variables C IARGV = IARGC() IF (IARGV.EQ.0) THEN OMODE = GET_VIDEO_MODE(I,J) ERR = GRAPHICS_MODE (16) ERR = CLEAR() ERR = HOME() ERR = SET_PALETTE(PAL) ELSE ERR = GRAPHICS_MODE (6) ERR = CLEAR() ERR = TEXT_MODE() ENDIF WRITE (*,10) 10 FORMAT (/,12X, * 'ÉÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍ»', */,12X,'º KMINFLU º', */,12X,'º º', */,12X,'º K-Means Algorithm With Internal Influence Measurement º', */,12X,'º º', */,12X,'º Developed by Glenn W. Milligan º', */,12X,'º Revised by Richard Cheng º', */,12X,'º Ohio State University º', */,12X,'º Version 2.0 -- Feb. 1995 º', */,12X,'º º', */,12X,'ÈÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍÍͼ', *////,12X,' In the event of a data entry error,', */,12X,' program execution can be terminated by', */,12X,' entering -99 for any numerical prompt.', *///,12x,' Press enter to continue') IKEY = PAUSE() IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF WRITE (*,20) 20 FORMAT (' Enter a one line title for your analysis:') READ (*,30) TITLE 30 FORMAT (20A4) 40 WRITE (*,50) 50 FORMAT (/,' How many elements in the data set?') 60 READ (*,70,ERR=80) NE 70 FORMAT (I5) IF (NE.EQ.-99) GO TO 2280 GO TO 100 80 CALL BEEP WRITE (*,90) 90 FORMAT (' Integer value expected, please re-enter:') GO TO 60 100 IF (NE.GT.MAXPTS) THEN CALL BEEP WRITE (*,110) 110 FORMAT (' You have exceeded the maximum number of objects ', * 'allowed.',/,' Please re-enter:') GO TO 60 ENDIF IF (NE.LT.2) THEN CALL BEEP IF (NE.LT.0) THEN WRITE (*,120) 120 FORMAT (' You cannot have a negative number of objects!',/, * ' Please re-enter:') ELSE WRITE (*,130) 130 FORMAT (' You cannot cluster with less than two objects',/, * ' Please re-enter:') ENDIF GO TO 60 ENDIF IF (MODIFY.EQ.1) GO TO 1190 140 WRITE (*,150) 150 FORMAT (/,' How many variables in the data set?') 160 READ (*,70,ERR=170) NV IF (NV.EQ.-99) GO TO 2280 GO TO 180 170 CALL BEEP WRITE (*,90) GO TO 160 180 IF (NV.GT.MAXVAR) THEN CALL BEEP WRITE (*,190) 190 FORMAT (' You have exceeded the maximum number of variables', * ' allowed.',/,' Please re-enter:') GO TO 160 ENDIF IF (NV.LT.1) THEN CALL BEEP IF (NV.LT.0) THEN WRITE (*,200) 200 FORMAT (' You cannot have a negative number of variables!',/ * ,' Please re-enter:') ELSE WRITE (*,210) 210 FORMAT (' You cannot cluster with less than one ', * 'variable!',/,' Please re-enter:') ENDIF GO TO 160 ENDIF IF (MODIFY.EQ.1) GO TO 1190 220 WRITE (*,230) 230 FORMAT (/,' Enter desired number of clusters:') 240 READ (*,70,ERR=250) NC IF (NC.EQ.-99) GO TO 2280 GO TO 260 250 CALL BEEP WRITE (*,90) GO TO 240 260 IF (NC.GT.MAXCLS) THEN CALL BEEP WRITE (*,270) 270 FORMAT (' You have exceeded maximum number of clusters', * ' allowed.',/,' Please re-enter:') GO TO 240 ENDIF IF (NC.LT.2) THEN CALL BEEP IF (NC.LT.0) THEN WRITE (*,280) 280 FORMAT (' You cannot have a negative number of clusters!',/, * ' Please re-enter:') ELSE WRITE (*,290) 290 FORMAT (' You can cluster with less than 2 clusters.',/, * ' Please re-enter:') ENDIF GO TO 240 ENDIF IF (MODIFY.EQ.1.AND.IPART.EQ.1) GO TO 530 IF (MODIFY.EQ.1.AND.IPART.EQ.2) GO TO 620 IF (MODIFY.EQ.1) GO TO 1190 IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF 300 WRITE (*,*) WRITE (*,*) 'Algorithm to be used?' WRITE (*,*) ' 1 = Jancey Algorithm' WRITE (*,*) ' 2 = Forgy Algorithm' 310 READ (*,70,ERR=320) METHOD IF (METHOD.EQ.-99) GO TO 2280 GO TO 340 320 CALL BEEP WRITE (*,330) 330 FORMAT (' Please enter 1 or 2.') GO TO 310 340 IF (METHOD.NE.1.AND.METHOD.NE.2) THEN CALL BEEP WRITE (*,330) GO TO 310 ENDIF IF (MODIFY.EQ.1) GO TO 1190 350 WRITE (*,360) 360 FORMAT (/,' Enter the stopping rule:',/,3X,'0 = Stop only if no', * ' more data element changes cluster membership (Default)',/,3X, * '1 = Stop when 1 or fewer data elements change cluster', * ' membership',/,3X, * '2 = Stop when 2 or fewer data elements change cluster', * ' membership',/,3X, * '3 = Stop when 3 or fewer data elements change cluster', * ' membership',/,3X,'etc.') 370 READ (*,70,ERR=380) MINREL IF (MINREL.EQ.-99) GO TO 2280 GO TO 390 380 CALL BEEP WRITE (*,90) GO TO 370 390 IF (MINREL.LT.0.OR.MINREL.GT.NE) THEN CALL BEEP WRITE (*,400) 400 FORMAT (' Your selection must fall between 0 and the number',/, * ' of elements in the data set. Please re-enter:') GO TO 370 ENDIF IF (MODIFY.EQ.1) GO TO 1190 C C RANGE STANDARDIZATION OPTION C IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF 410 WRITE (*,*) WRITE (*,*) 'Standardize distances by range?' WRITE (*,*) ' 0 = No (Default)' WRITE (*,*) ' 1 = Yes' 420 READ (*,70,ERR=430) ISTD IF (ISTD.EQ.-99) GO TO 2280 GO TO 450 430 CALL BEEP WRITE (*,440) 440 FORMAT (' Please enter 0 or 1') GO TO 420 450 IF (ISTD.NE.0.AND.ISTD.NE.1) THEN CALL BEEP WRITE (*,440) GO TO 420 ENDIF IF (MODIFY.EQ.1.AND.IPART.EQ.3.AND.ISTD.EQ.1) GO TO 830 IF (MODIFY.EQ.1) GO TO 1190 460 WRITE (*,470) 470 FORMAT (1X,'How to select the seed points?',/,4X, * '1 = Specified as certain data elements',/,4X, * '2 = Data elements are grouped into an initial partition',/,4X, * '3 = Specified centroids are to be used as seed points',/,4X, * '4 = Elements are selected randomly') 480 READ (*,70,ERR=490) IPART IF (IPART.EQ.-99) GO TO 2280 GO TO 500 490 CALL BEEP WRITE (*,90) GO TO 480 500 IF (IPART.GT.4.OR.IPART.LT.1) THEN CALL BEEP WRITE (*,510) 510 FORMAT (' Invalid selection, please re-enter:') GO TO 480 ENDIF IF (MODIFY.EQ.1.AND.IPART.EQ.1) GO TO 530 IF (MODIFY.EQ.1.AND.IPART.EQ.2) GO TO 620 IF (MODIFY.EQ.1.AND.IPART.EQ.3) GO TO 800 IF (MODIFY.EQ.1.AND.IPART.EQ.4) GO TO 920 IF (MODIFY.EQ.1) GO TO 1190 INF = 0 DO 520 I = 1, NC NUMBR(I) = 0 520 CONTINUE C C IPART = 1, SEED POINTS ARE SELECTED AS CERTAIN DATA ELEMENTS C 530 IF (IPART.EQ.1) THEN IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF DO 610 I = 1, NC WRITE (*,540) I 540 FORMAT (/, * ' Enter data element number for seed for cluster #',I3, * ':') 550 READ (*,70,ERR=560) NUMTMP IF (NUMTMP.EQ.-99) GO TO 2280 GO TO 570 560 CALL BEEP WRITE (*,90) GO TO 550 570 IF (NUMTMP.LT.1.OR.NUMTMP.GT.NE) THEN CALL BEEP WRITE (*,580) 580 FORMAT (' Entry error, please re-enter:') GO TO 550 ENDIF IF (I.EQ.1) THEN NUMBR(I) = NUMTMP ELSE DO 600 J = 1, I-1 IF (NUMTMP.EQ.NUMBR(J)) THEN CALL BEEP WRITE (*,590) 590 FORMAT (' That data element is already a seed', * ' point, please enter another one:') GO TO 550 ELSE NUMBR(I) = NUMTMP ENDIF 600 CONTINUE ENDIF 610 CONTINUE ENDIF IF (MODIFY.EQ.1) GO TO 1190 C C IPART=2, DATA ELEMENTS ARE GROUPED INTO AN INITIAL PARTITION C 620 IF (IPART.EQ.2) THEN IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF 630 WRITE (*,640) 640 FORMAT (/,' This program assumes data element #1 is the',/, * ' first data element for cluster 1.') NFIRST(1) = 1 DO 710 I = 2, NC WRITE (*,650) I 650 FORMAT (/,' Enter first data element number for cluster',I3, * ':') 660 READ (*,70,ERR=670) NUMTMP IF (NUMTMP.EQ.-99) GO TO 2280 GO TO 680 670 CALL BEEP WRITE (*,90) GO TO 660 680 IF (NUMTMP.LT.1.OR.NUMTMP.GT.NE) THEN CALL BEEP WRITE (*,580) GO TO 660 ENDIF DO 700 J = 1, I-1 IF (NUMTMP.LE.NFIRST(J)) THEN CALL BEEP WRITE (*,690) 690 FORMAT (' You cannot have overlapping groups,', * ' please re-enter:') GO TO 660 ENDIF 700 CONTINUE NFIRST(I) = NUMTMP 710 CONTINUE IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF WRITE (*,720) 720 FORMAT (/,' You have entered the starting clusters as', * ' follows:') DO 740 I = 1, NC-1 WRITE (*,730) I,NFIRST(I),NFIRST(I+1)-1 730 FORMAT (5X,' Cluster',I3,': From',I3,' to',I3) 740 CONTINUE WRITE (*,730) NC,NFIRST(NC),NE WRITE (*,750) 750 FORMAT (/,' Do you want to change this initial partition?',/,4X * ,'0 = No (Default)',/,4X,'1 = Yes') 760 READ (*,70,ERR=770) IMOD IF (IMOD.EQ.-99) GO TO 2280 GO TO 780 770 CALL BEEP WRITE (*,440) GO TO 760 780 IF (IMOD.NE.0.AND.IMOD.NE.1) THEN CALL BEEP WRITE (*,440) GO TO 760 ENDIF IF (IMOD.EQ.1) THEN IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF GO TO 630 ENDIF DO 790 I = 1, NC-1 NUMBR(I) = NFIRST(I+1)-NFIRST(I) 790 CONTINUE NUMBR(NC) = NE-NFIRST(NC)+1 ENDIF IF (MODIFY.EQ.1) GO TO 1190 C C IPART=3, SPECIFIED CENTROIDS ARE TO BE USED AS SEED POINTS C 800 IF (IPART.EQ.3) THEN IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF WRITE (*,810) 810 FORMAT (/,' Enter file name containing starting centroids:') 820 READ (*,1060) FILCEN INQUIRE (FILE=FILCEN,EXIST=TEST) IF (.NOT.TEST) THEN CALL BEEP WRITE (*,1070) GO TO 820 ENDIF C C IF RANGE STANDARDIZATION OF VARIABLES WAS REQUESTED, CHECK C TO SEE IF THE CENTROID FILE IS ALSO STANDARDIZED TO AVOID C PROBLEMS CAUSED BY DIFFERENT SCALES C 830 IF (ISTD.EQ.1) THEN WRITE (*,840) 840 FORMAT (/,' Is centroid file already standardized by', * ' range?',/,4X,'0 = No',/,4X,'1 = Yes') 850 READ (*,70,ERR=860) ISTD2 IF (ISTD2.EQ.-99) GO TO 2280 GO TO 870 860 CALL BEEP WRITE (*,90) GO TO 850 870 IF (ISTD2.NE.0.AND.ISTD2.NE.1) THEN CALL BEEP WRITE (*,440) GO TO 850 ENDIF IF (ISTD2.EQ.0) THEN WRITE (*,880) 880 FORMAT (/,' There is a good possibily that the starting', * /,' centroids are poor seed points. Please compute',/ * ,' starting centroids based on standardized data',/, * ' scale or use HCINFLU to generate starting',/, * ' centroids automatically.',/) IKEY = PAUSE() GO TO 2280 ENDIF ENDIF IF (MODIFY.EQ.1.AND.MODCEN.EQ.1) GO TO 1190 890 WRITE (*,900) 900 FORMAT (/,' Enter input format for centroids:') READ (*,910) FMTCEN IF (MODIFY.EQ.1) GO TO 1190 910 FORMAT (A40) ENDIF IF (MODIFY.EQ.1) GO TO 1190 C C IPART=4, SEED POINTS ARE RANDOMLY SELECTED FROM DATA ELEMENTS C 920 IF (IPART.EQ.4) THEN WRITE (*,930) 930 FORMAT (/,' Enter an odd integer as the seed for random', * ' number generator:') 940 READ (*,950,ERR=960) ISEED 950 FORMAT (I8) IF (ISEED.EQ.-99) GO TO 2280 ITEST = ISEED/2 ITEST = ITEST*2 IF (ISEED.EQ.ITEST) GO TO 960 GO TO 970 960 CALL BEEP WRITE (*,*) 'An odd integer is expected, please re-enter:' GO TO 940 ENDIF 970 IF (MODIFY.EQ.1) GO TO 1190 JSEED = ISEED 980 WRITE (*,990) 990 FORMAT (/,' Internal influence measure for each data elements?',/, * 4X,'0 = No (Default)',/,4X,'1 = Yes') 1000 READ (*,70,ERR=1010) INF IF (INF.EQ.-99) GO TO 2280 GO TO 1020 1010 CALL BEEP WRITE (*,90) GO TO 1000 1020 IF (INF.NE.1.AND.INF.NE.0) THEN CALL BEEP WRITE (*,440) GO TO 1000 ENDIF IF (MODIFY.EQ.1.AND.INF.EQ.1) GO TO 1130 IF (MODIFY.EQ.1.AND.INF.EQ.0) THEN LEVEL = 0 GO TO 1190 ENDIF IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF 1030 WRITE (*,1040) 1040 FORMAT (/,' Enter the data file name:') 1050 READ (*,1060) FILIN 1060 FORMAT (A20) INQUIRE (FILE=FILIN,EXIST=TEST) IF (.NOT.TEST) THEN CALL BEEP WRITE (*,1070) 1070 FORMAT (' Input file not found - Try again...') GO TO 1050 ENDIF IF (MODIFY.EQ.1) GO TO 1190 1080 WRITE (*,1090) 1090 FORMAT (/,' Enter input format for data elements:') 1100 READ (*,1110) FMT 1110 FORMAT (A40) IF (MODIFY.EQ.1) GO TO 1190 1120 WRITE (*,*) WRITE (*,*) 'Enter the output file name:' READ (*,1060) OUTFIL IF (MODIFY.EQ.1) GO TO 1190 IF (INF.EQ.0) GO TO 1180 1130 WRITE (*,1140) 1140 FORMAT (/,' Output level?',/,4X, * '0 = Standard Output - Including Influence Results (Default)',/ * ,4X,'1 = Extended Output - Detailed Influence Results') 1150 READ (*,70,ERR=1160) LEVEL IF (LEVEL.EQ.-99) GO TO 2280 GO TO 1170 1160 CALL BEEP WRITE (*,90) GO TO 1150 1170 IF (LEVEL.NE.1.AND.LEVEL.NE.0) THEN CALL BEEP WRITE (*,440) GO TO 1150 ENDIF 1180 IF (MODIFY.EQ.1) GO TO 1190 C C USER CONFIRMATION SECTION C 1190 IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF MODIFY = 0 MODCEN = 0 WRITE (*,1200) 1200 FORMAT (//,6X,'You have entered the following information:',/) WRITE (*,1210) NE,NV,NC 1210 FORMAT (5X,' 1. Number of objects: ',I3,/,5X, * ' 2. Number of variables: ',I3,/,5X, * ' 3. Number of clusters: ',I3) WRITE (*,1220) NAME(METHOD) 1220 FORMAT (5X,' 4. Algorithm selected: ',A20) IF (MINREL.EQ.0) THEN WRITE (*,1230) 1230 FORMAT (5X,' 5. Stopping rule: Stop when no point changes', * ' cluster membership') ELSE IF (MINREL.EQ.1) THEN WRITE (*,1240) 1240 FORMAT (5X,' 5. Stopping rule: Stop when 1 or fewer', * ' points change membership') ELSE WRITE (*,1250) MINREL 1250 FORMAT (5X,' 5. Stopping rule: Stop when',I3,' or fewer', * ' points change membership') ENDIF ENDIF IF (IPART.EQ.1) THEN WRITE (*,1260) 1260 FORMAT (5X,' 6. Starting seed selection: User supplied data', * ' elements') WRITE (*,1270) (NUMBR(I),I=1,NC) 1270 FORMAT (5X,' 9. Data elements selected:',10I4,3(/,33X,10I4)) ELSE IF (IPART.EQ.2) THEN WRITE (*,1280) 1280 FORMAT (5X,' 6. Starting seed selection: User supplied', * ' starting groups') ELSE IF (IPART.EQ.3) THEN WRITE (*,1290) 1290 FORMAT (5X,' 6. Starting seed selection: User supplied', * ' starting centroids') WRITE (*,1300) FILCEN 1300 FORMAT (5X,' 7. Starting centroids file name: ',A20) WRITE (*,1310) FMTCEN 1310 FORMAT (5X,' 8. Starting centroids format: ',A20) ELSE WRITE (*,1320) 1320 FORMAT (5X,' 6. Starting seed selection: Randomly', * ' selected data elements') ENDIF ENDIF ENDIF IF (ISTD.EQ.1) THEN WRITE (*,1330) 1330 FORMAT (5X,' 10. Distances standardized by range: Yes') ELSE WRITE (*,1340) 1340 FORMAT (5X,' 10. Distances standardized by range: No') ENDIF IF (INF.EQ.0) THEN WRITE (*,1350) 1350 FORMAT (5X,' 11. Internal influence for each point: No') ELSE WRITE (*,1360) 1360 FORMAT (5X,' 11. Internal influence for each point: Yes') ENDIF WRITE (*,1370) FILIN 1370 FORMAT (5X,' 12. Data file name: ',A20) WRITE (*,1380) FMT 1380 FORMAT (5X,' 13. Data file format: ',A40) WRITE (*,1390) OUTFIL 1390 FORMAT (5X,' 14. Output file name: ',A20) IF (INF.EQ.1) THEN IF (LEVEL.EQ.0) THEN WRITE (*,1400) 1400 FORMAT (5X,' 15. Output level: Standard output') ELSE WRITE (*,1410) 1410 FORMAT (5X,' 15. Output level: Extended output') ENDIF ENDIF WRITE (*,1420) 1420 FORMAT (//,5X,' Select an item number to make a change', * ' or press enter to continue...') 1430 READ (*,70,ERR=1440) ICH IF (ICH.EQ.-99) GO TO 2280 GO TO 1450 1440 CALL BEEP WRITE (*,110) GO TO 1430 1450 IF (ICH.NE.0) THEN MODIFY = 1 IF (ICH.EQ.8) MODCEN = 1 IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF GO TO (40,140,220,300,350,460,800,890,530,410,980,1030,1080, * 1120,1130) ICH ENDIF DO 1460 I = 1, NC NUMOLD(I) = NUMBR(I) 1460 CONTINUE C C READ THE DATA SET INTO CENTRAL MEMORY C IF (IPART.EQ.3) OPEN (4,FILE=FILCEN,STATUS='UNKNOWN') OPEN (5,FILE=FILIN,STATUS='UNKNOWN') OPEN (6,FILE=OUTFIL,STATUS='UNKNOWN') DO 1470 K = 1, NE M = (K-1)*NV+1 N = K*NV READ (5,FMT,ERR=1480,END=2260) (DATA(KM),KM=M,N) 1470 CONTINUE GO TO 1490 1480 WRITE (*,*) 'Incorrect data format, please re-enter:' REWIND 5 GO TO 1100 1490 DO 1500 I = 1, 2*NV CDATA(I) = DATA(I) 1500 CONTINUE C C RANGE STANDARDIZATION OPTION C IF (ISTD.EQ.1) THEN DO 1530 I = 1, NV XMAX = 0.0 XMIN = 9.9E99 DO 1510 J = 1, NE IF (DATA((J-1)*NV+I).GT.XMAX) XMAX = DATA((J-1)*NV+I) IF (DATA((J-1)*NV+I).LT.XMIN) XMIN = DATA((J-1)*NV+I) 1510 CONTINUE RANGE = XMAX-XMIN DO 1520 J = 1, NE DATA((J-1)*NV+I) = (DATA((J-1)*NV+I)-XMIN)/RANGE 1520 CONTINUE 1530 CONTINUE ENDIF C C STORE ORIGINAL DATA ARRAY FOR LATER USE C DO 1540 I = 1, NE*NV BDATA(I) = DATA(I) 1540 CONTINUE C C ESTABLISH INITIAL PARTITION C IF (IPART.EQ.4) THEN 1550 CALL RANDOM (ISEED,ASEED,NC) DO 1560 IM = 1, NC NUMBR(IM) = INT(ASEED(IM)*(NE-1)+1) 1560 CONTINUE DO 1570 IM = 2, NC IM1 = IM-1 DO 1570 IM2 = 1, IM1 IF (NUMBR(IM).EQ.NUMBR(IM2)) GO TO 1550 1570 CONTINUE ENDIF IF (IPART.EQ.3) THEN DO 1580 I = 1, NC M = (I-1)*NV+1 N = I*NV READ (4,FMTCEN,ERR=2270,END=2260) (CENTR(J),J=M,N) 1580 CONTINUE ENDIF IF (IPART.EQ.2) THEN K = 0 J1 = -NV DO 1600 J = 1, NC NJ = NUMBR(J) J1 = J1+NV DO 1590 I = 1, NV JJ1 = J1+I TOTAL(JJ1) = 0. 1590 CONTINUE DO 1600 KJ = 1, NJ K = K+1 MEMBR(K) = J K1 = (K-1)*NV DO 1600 I = 1, NV J2 = J1+I KKI = K1+I TOTAL(J2) = TOTAL(J2)+DATA(KKI) 1600 CONTINUE J1 = 0 DO 1610 J = 1, NC DO 1610 I = 1, NV J1 = J1+1 CENTR(J1) = TOTAL(J1)/NUMBR(J) 1610 CONTINUE ENDIF C C IPART = 1 OR IPART = 4 C SET UP THE SEED POINTS - THE DATA UNIT WITH SEQUENCE NUMBER C NUMBR(J)IS USED AS THE J-TH SEED POINT C IF (IPART.EQ.1.OR.IPART.EQ.4) THEN DO 1620 J = 1, NC NJ = (NUMBR(J)-1)*NV J1 = (J-1)*NV DO 1620 I = 1, NV CENTR(J1+I) = DATA(NJ+I) 1620 CONTINUE ENDIF C C STORE THE STARTING CENTROIDS C DO 1630 I = 1, NV*NC CTROLD(I) = CENTR(I) 1630 CONTINUE IF (IARGV.EQ.0) THEN ERR = CLEAR() ERR = HOME() ELSE CALL CLEAR_TEXT ENDIF WRITE (*,1640) NAME(METHOD) 1640 FORMAT (//,' Starting ',A20,//) WRITE (6,1650) 1650 FORMAT (/,10X,/,10X, * ' KMINFLU ',/,10X * ,' ',/, * 10X,' K-Means Algorithm With Internal Influence Measurement ',/ * ,10X,' ', * /,10X,' Developed By Glenn W. Milligan ' * ,/,10X, * ' Revised by Richard Cheng ',/,10X * ,' Ohio State University ',/, * 10X,' Version 2.0 -- Feb. 1995 ',/ * /) WRITE (6,1660) TITLE 1660 FORMAT (1X,20A4) WRITE (6,1670) NE,NV,NC,MINREL 1670 FORMAT (//,' User Input Specifications:',//, * ' Number of elements =',I5,//,' Number of variables =',I5, * //,' Number of clusters =',I5,//,' Minimum release =', * I5,/) IF (IPART.EQ.1) THEN WRITE (6,1680) 1680 FORMAT (' Initial partition = User supplied data', * ' elements') ELSE IF (IPART.EQ.2) THEN WRITE (6,1690) 1690 FORMAT (' Initial partition = User supplied', * ' starting groups') ELSE IF (IPART.EQ.3) THEN WRITE (6,1700) 1700 FORMAT (' Initial partition = User supplied', * ' starting centroids') ELSE WRITE (6,1710) 1710 FORMAT (' Initial partition = Randomly', * ' selected data elements') ENDIF ENDIF ENDIF WRITE (6,1720) NAME(METHOD),FILIN,FMT 1720 FORMAT (/,' Method used =',4X,A20,//, * ' Data file name =',4X,A20,//,' Data input format =' * ,4X,A40,/) WRITE (6,1730) 1730 FORMAT (' The first two data lines were read as follows:',/) DO 1740 I = 1, 2 WRITE (6,FMT) (CDATA(K),K=(I-1)*NV+1,I*NV) 1740 CONTINUE WRITE (6,*) IF (ISTD.EQ.1) THEN WRITE (6,1750) 1750 FORMAT (' The first two data lines after standardization:',/) DO 1770 I = 1, 2 WRITE (6,1760) (DATA(K),K=(I-1)*NV+1,I*NV) 1760 FORMAT (12F6.3) 1770 CONTINUE WRITE (6,*) ENDIF IF (IPART.EQ.4) THEN WRITE (6,1780) JSEED 1780 FORMAT (1X,'The random number seed specified was ',I8,/) WRITE (6,1790) (NUMBR(J),J=1,NC) 1790 FORMAT (1X,'The random seed data elements selected for the', * ' initial partion were:',/,4(10I5),/) ELSE IF (IPART.EQ.3) THEN WRITE (6,1800) FILCEN 1800 FORMAT (' Centroids file name =',4X,A20,/) WRITE (6,1810) FMTCEN 1810 FORMAT (' Centroid input format =',4X,A40,/) WRITE (6,1820) 1820 FORMAT (' The first two lines of centroids file were read', * ' as follows:',/) DO 1830 I = 1, 2 WRITE (6,FMTCEN) (CENTR(K),K=(I-1)*NV+1,I*NV) 1830 CONTINUE WRITE (6,1840) 1840 FORMAT (//,1X,'Starting Partition Information:') WRITE (6,1850) 1850 FORMAT (/,1X,'Initial cluster centroids read in as', * ' follows:') J1 = 0 DO 1890 J = 1, NC II = J1+1 III = J1+NV WRITE (6,1860) J 1860 FORMAT (/,1X,'Centroids for cluster',I3,/) DO 1880 I = II, III WRITE (6,1870) CENTR(I) 1870 FORMAT (1X,G15.5) 1880 CONTINUE J1 = III 1890 CONTINUE ELSE IF (IPART.EQ.2) THEN WRITE (6,1900) 1900 FORMAT (' Starting centroids are computed from the', * ' following data elements:') DO 1920 I = 1, NC-1 WRITE (6,1910) I,NFIRST(I),NFIRST(I+1)-1 1910 FORMAT (5X,' Cluster',I3,': From',I3,' to',I3) 1920 CONTINUE WRITE (6,1910) NC,NFIRST(NC),NE WRITE (6,1930) 1930 FORMAT (/,' Number of data elements in the initial', * ' partition:',/) ELSE WRITE (6,1940) 1940 FORMAT (' Data elements selected as initial seeds:',/) ENDIF WRITE (6,1950) (NUMBR(J),J=1,NC) 1950 FORMAT (1X,10I7) ENDIF ENDIF NSUB = 0 ICODE = 0 CALL KMEAN (0,0) IF (ICODE.EQ.1) GO TO 2280 CALL RESULT (0,0,PCORR) IF (INF.EQ.1) THEN DO 1960 I = 1, NE PBS(I) = 0. HA(I) = 0. 1960 CONTINUE C C SAVE CLUSTER MEMBERSHIP INFORMATION FROM FIRST K-MEANS RUN C DO 1970 I = 1, NE MBROLD(I) = MEMBR(I) 1970 CONTINUE NSUB = 1 C C DO K-MEANS NE TIMES EACH TIME IGNORING ONE POINT IN TURN C DO 2110 IGNORE = 1, NE WRITE (*,1980) IGNORE 1980 FORMAT ('+-----> Calculating influence for data element',I4) IF (LEVEL.EQ.1) THEN WRITE (6,2150) 12 WRITE (6,1990) IGNORE 1990 FORMAT (' Clustering result when point',I3, * ' is removed from the data set:') ENDIF DO 2000 I = 1, NE*NV DATA(I) = 0. 2000 CONTINUE IDX = 1 DO 2020 I = 1, NE IF (I.NE.IGNORE) THEN M = (I-1)*NV+1 N = I*NV DO 2010 J = M, N DATA(IDX) = BDATA(J) IDX = IDX+1 2010 CONTINUE ENDIF 2020 CONTINUE C C RESET STARTING CENTROIDS C DO 2030 I = 1, NV*NC CENTR(I) = CTROLD(I) 2030 CONTINUE CALL KMEAN (1,IGNORE) IF (ICODE.EQ.2) THEN HA(IGNORE) = 999. GO TO 2110 ENDIF CALL RESULT (1,IGNORE,PCORR) PBS(IGNORE) = PCORR C C CONSTRUCT ADJUSTED MEMBERSHIP ARRAYS FROM THE ORIGINAL C CLUSTERING BUT IGNORE THE POINT OF INTEREST C K = 0 DO 2040 I = 1, NE IF (I.NE.IGNORE) THEN K = K+1 MBRADJ(K) = MBROLD(I) ENDIF 2040 CONTINUE DO 2050 I = 1, NC DO 2050 J = 1, NC NHA(I,J) = 0 2050 CONTINUE NEW = NE-1 DO 2060 I = 1, NEW NHA(MBRADJ(I),MEMBR(I)) = NHA(MBRADJ(I),MEMBR(I))+1 2060 CONTINUE C C WRITE (6,*) 'NHA ARRAY' C DO 165 I = 1, NC C WRITE (6,*) (NHA(I,J),J=1,NC) C165 CONTINUE C NIJSQ = 0 DO 2070 I = 1, NC DO 2070 J = 1, NC NIJSQ = NIJSQ+NHA(I,J)**2 2070 CONTINUE C C WRITE (6,*) 'NIJSQ =',NIJSQ C DO 2080 I = 1, NC NID(I) = 0 NDJ(I) = 0 2080 CONTINUE DO 2090 I = 1, NC DO 2090 J = 1, NC NID(I) = NID(I)+NHA(J,I) NDJ(I) = NDJ(I)+NHA(I,J) 2090 CONTINUE C C WRITE (6,*) 'NID =',(NID(I),I=1,NC) C WRITE (6,*) 'NDJ =',(NDJ(I),I=1,NC) C NIDSQ = 0 NDJSQ = 0 DO 2100 I = 1, NC NIDSQ = NIDSQ+NID(I)**2 NDJSQ = NDJSQ+NDJ(I)**2 2100 CONTINUE C C WRITE (6,*) 'NIDSQ =',NIDSQ C WRITE (6,*) 'NDJSQ =',NDJSQ C HATERM = (NIDSQ*NDJSQ-NEW*(NIDSQ+NDJSQ)+NEW**2)/((NEW)*(NEW- * 1)) C C WRITE (6,*) 'HATERM=',HATERM C HARAND = (NIJSQ-NEW-HATERM)/(0.5*(NIDSQ+NDJSQ)-NEW-HATERM) HA(IGNORE) = HARAND 2110 CONTINUE IFLAG = 0 DO 2120 I = 1, NE IF (HA(I).GT.990.0.OR.PBS(I).GT.990.0) THEN IFLAG = 1 GO TO 2130 ENDIF 2120 CONTINUE 2130 IF (IFLAG.EQ.0) THEN WRITE (6,2140) 2140 FORMAT (/,' No exception found') ENDIF WRITE (6,2150) 12 2150 FORMAT (A1) WRITE (6,2160) 2160 FORMAT (1X,'Summary Statistics:',//,1X,'Data Point',4X, * 'Point Biserial',4X,'Internal Inf.',/,1X,' Removed ',4X, * ' Statistic ',4X,' Measure ',/,50('-')) DO 2200 I = 1, NE IF (HA(I).GT.990.0.OR.PBS(I).GT.990.0) THEN WRITE (6,2170) I 2170 FORMAT (3X,I3,11X,'Undefined',8X,'Undefined') ELSE IF (ABS(HA(I)-1).GT.0.001) THEN WRITE (6,2180) I,PBS(I),HA(I) 2180 FORMAT (3X,I3,10X,F8.3,9X,F8.3,5X,'<-----') ELSE WRITE (6,2190) I,PBS(I),HA(I) 2190 FORMAT (3X,I3,10X,F8.3,9X,F8.3) ENDIF ENDIF 2200 CONTINUE ENDIF WRITE (6,*) WRITE (6,*) 'End of Method' WRITE (6,2150) 12 CALL BEEP WRITE (*,2210) 2210 FORMAT (/,' Job successfully completed.'///, * ' Continue with additional analyses?',/,4X,'0 = No (Default)',/ * ,4X,'1 = Yes') 2220 READ (*,70,ERR=2230) MORE IF (MORE.EQ.-99.OR.MORE.EQ.0) GO TO 2280 GO TO 2240 2230 CALL BEEP WRITE (*,440) GO TO 2220 2240 IF (MORE.NE.0.AND.MORE.NE.1) THEN CALL BEEP WRITE (*,440) GO TO 2220 ENDIF IF (MORE.EQ.1) THEN REWIND 4 REWIND 5 DO 2250 I = 1, NC NUMBR(I) = NUMOLD(I) 2250 CONTINUE GO TO 1190 ENDIF 2260 CALL BEEP WRITE (*,*) 'Program has reached the end of your data file' WRITE (*,*) 'but your format and data set size specifications' WRITE (*,*) 'indicate that more data is expected or required.' WRITE (*,*) 'Program terminating...' GO TO 2280 2270 CALL BEEP WRITE (*,*) 'invalid, Inconsistent, or incomplete data format' WRITE (*,*) 'Program terminating...' 2280 CONTINUE WRITE (*,*) ' ' WRITE (*,*) ' ' WRITE (*,*) 'Press enter to return to system...' WRITE (*,*) IKEY = PAUSE() IF (IARGV.NE.0) GO TO 2290 ERR = SET_VIDEO_MODE(OMODE) 2290 CALL TEXT_MODE STOP END C*********************************************************************** C SUBROUTINE KMEAN C THIS SUBROUTINE ITERATIVELY SORTS *NE* DATA UNITS INTO *NC* C CLUSTERS USING THE ALGORITHM OF FORGY AND JANCEY C*********************************************************************** C SUBROUTINE KMEAN(INF,IGNORE) PARAMETER (MAXPTS=550, MAXVAR=30, MAXCLS=40) COMMON/BLK1/CENTR(MAXVAR*MAXCLS),NUMBR(MAXCLS),MEMBR(MAXPTS), * TOTAL(MAXPTS*2),DATA(MAXPTS*MAXVAR),TITLE(20) COMMON /BLK2/ NE,NV,NC,MINREL,IPART,METHOD,NSUB,LEVEL,ICODE,PFLAG COMMON /BLK3/ BDATA(MAXPTS*MAXVAR),CTROLD(MAXVAR*MAXCLS) COMMON /BLK4/ FMT INTEGER PFLAG CHARACTER*40 FMT ICODE = 0 IF (IPART.EQ.2) GO TO 20 C C INITIALIZE ARRAYS C DO 10 K = 1, NE MEMBR(K) = 0 10 CONTINUE 20 NPASS = 1 IF (INF.EQ.0) THEN WRITE (6,30) 12 30 FORMAT (A1) WRITE (6,40) 40 FORMAT (/,1X,'Intermediate Results: (history of the', * ' clustering iterations)',//,1X,'Iteration',4X, * '# of Data Units Moved',4X,'Summed Deviation', * ' About Seed Points',/,1X,73('-')) ENDIF C C BEGINNING OF MAIN LOOP C 50 J1 = 0 DO 60 J = 1, NC NUMBR(J) = 0 DO 60 I = 1, NV J1 = J1+1 TOTAL(J1) = 0. 60 CONTINUE MOVES = 0 TDIST = 0 C C ALLOCATE EACH DATA UNIT TO THE NEAREST CLUSTER CENTROID C K1 = 0 DO 90 K = 1, NE-NSUB K2 = K1+1 J2 = 1 C C COMPUTE DISTANCE TO FIRST CLUSTER CENTROID C DREF = DIST(DATA(K2),CENTR(J2),NV) JREF = 1 C C TEST DISTANCES TO REMAINING CLUSTER CENTROIDS C DO 70 J = 2, NC J2 = J2+NV DTEST = DIST(DATA(K2),CENTR(J2),NV) IF (DTEST.GE.DREF) GO TO 70 DREF = DTEST JREF = J 70 CONTINUE C C ALLOCATE DATA UNIT *K* TO CLUSTER *JREF* C NUMBR(JREF) = NUMBR(JREF)+1 TDIST = TDIST+DREF IF (JREF.EQ.MEMBR(K)) GO TO 80 C C THE DATA UNIT CHANGES ITS MEMBERSHIP C MOVES = MOVES+1 MEMBR(K) = JREF 80 J1 = (JREF-1)*NV DO 90 I = 1, NV J1 = J1+1 K1 = K1+1 TOTAL(J1) = TOTAL(J1)+DATA(K1) 90 CONTINUE C C ALL DATA UNITS ALLOCATED. TEST FOR CONVERGENCE C IF (INF.EQ.0) THEN WRITE (6,100) NPASS,MOVES,TDIST 100 FORMAT (3X,I3,13X,I5,18X,F16.5) ENDIF NPASS = NPASS+1 JREF = 0 IF (MOVES.GT.MINREL) GO TO 180 IF (METHOD.NE.1.AND.MOVES.EQ.0) RETURN JREF = 1 C C COMPUTE TRUE CLUSTER CENTROIDS--FORGY UPDATE C 110 J1 = 0 DO 120 J = 1, NC IF (NUMBR(J).EQ.0) GO TO 130 DO 120 I = 1, NV J1 = J1+1 CENTR(J1) = TOTAL(J1)/NUMBR(J) 120 CONTINUE GO TO 170 130 IF (INF.EQ.0) THEN WRITE (6,140) 140 FORMAT (' Some clusters have no data elements in the updated',/ * ,' centroids for the Forgy method.',/, * ' Each centroid must have at least one data element',/, * ' assigned to it.',/,' Further computation is not possible.' * ,/,' # of data elements in each cluster:') WRITE (6,150) (NUMBR(J),J=1,NC) 150 FORMAT (15I4) WRITE (*,140) WRITE (*,150) (NUMBR(J),J=1,NC) ICODE = 1 RETURN ELSE WRITE (6,160) IGNORE 160 FORMAT (/,' An exception has occurred when data element',I4,/, * ' is removed from the data set.',/, * ' Internal influence cannot be computed for this',/, * ' data element.') WRITE (6,140) WRITE (6,150) (NUMBR(J),J=1,NC) ICODE = 2 RETURN ENDIF 170 IF (JREF.EQ.1) RETURN GO TO 50 180 IF (METHOD.NE.1) GO TO 110 C C JANCY UPDATE C J1 = 0 DO 190 J = 1, NC IF (NUMBR(J).EQ.0) GO TO 200 DO 190 I = 1, NV J1 = J1+1 CENTR(J1) = 2.*TOTAL(J1)/NUMBR(J)-CENTR(J1) 190 CONTINUE GO TO 50 200 IF (INF.EQ.0) THEN WRITE (6,210) 210 FORMAT (' Some clusters have no data elements in the updated',/ * ,' centroids for the Jancey method.',/, * ' Each centroid must have at least one data element',/, * ' assigned to it.',/,' Further computation is not possible.' * ,/,' # of data elements in each cluster:') WRITE (6,150) (NUMBR(J),J=1,NC) WRITE (*,210) WRITE (*,150) (NUMBR(J),J=1,NC) ICODE = 1 RETURN ELSE WRITE (6,160) IGNORE WRITE (6,210) WRITE (6,150) (NUMBR(J),J=1,NC) ICODE = 2 RETURN ENDIF RETURN END C*********************************************************************** C OUTPUT SECTION C THIS SUBROUTINE PRINTS THE RESULTS FROM A CLUSTERING JOB BASED C ON ANY VERSION OF SUBROUTINE *KMEAN*. C*********************************************************************** C SUBROUTINE RESULT(INF,IGNORE,PCORR) PARAMETER (MAXPTS=550, MAXVAR=30, MAXCLS=40) COMMON/BLK1/CENTR(MAXVAR*MAXCLS),NUMBR(MAXCLS),MEMBR(MAXPTS), * LIST(MAXPTS*2),DATA(MAXPTS*MAXVAR),TITLE(20) COMMON /BLK2/ NE,NV,NC,MINREL,IPART,METHOD,NSUB,LEVEL,ICODE,PFLAG LOGICAL WD(MAXPTS,MAXPTS) DIMENSION EVEC(MAXPTS*(MAXPTS-1)/2),IP(MAXPTS) INTEGER PFLAG IF (INF.EQ.0) WRITE (6,10) 10 FORMAT (///,1X,'Final clustering results are:') C C AS A CONTINGENCY PRECAUTION WRITE OUT THE RAW MEMBERSHIP LIST. C IF (INF.EQ.0) THEN WRITE (6,20) 20 FORMAT (/,1X,'Raw Membership List:',/,3X,'Data',5X,'Cluster',/, * 20('-')) DO 40 K = 1, NE WRITE (6,30) K,MEMBR(K) 30 FORMAT (3X,I3,7X,I3) 40 CONTINUE ELSE IDX = 1 DO 50 I = 1, NE IF (I.NE.IGNORE) THEN IP(IDX) = I IDX = IDX+1 ENDIF 50 CONTINUE IF (LEVEL.EQ.1) THEN WRITE (6,20) DO 60 I = 1, NE-1 WRITE (6,30) IP(I),MEMBR(I) 60 CONTINUE ENDIF ENDIF IF (INF.EQ.0.OR.LEVEL.EQ.1) THEN WRITE (6,70) 70 FORMAT (//,1X,'Cluster Sizes:',/,2X,'Cluster',4X,'Count',/,20( * '-')) DO 90 J = 1, NC WRITE (6,80) J,NUMBR(J) 80 FORMAT (3X,I3,7X,I3) 90 CONTINUE ENDIF C C SET UP THE LOGICAL ARRAY FOR THE CRITERION STATISTIC C DO 100 I = 1, NE-NSUB DO 100 J = 1, NE-NSUB WD(J,I) = .FALSE. 100 CONTINUE NE1 = NE-1-NSUB DO 130 J = 1, NC DO 120 K = 1, NE1 IF (MEMBR(K).NE.J) GO TO 120 K2 = K+1 DO 110 K1 = K2, NE-NSUB IF (MEMBR(K1).NE.J) GO TO 110 WD(K,K1) = .TRUE. WD(K1,K) = .TRUE. 110 CONTINUE 120 CONTINUE 130 CONTINUE C C COMPUTE INTERPOINT DISTANCES C IVEC = 0 DO 140 I = 2, NE-NSUB I1 = I-1 DO 140 J = 1, I1 IVEC = IVEC+1 J1 = NV*(J-1)+1 J2 = NV*(I-1)+1 EVEC(IVEC) = DIST(DATA(J1),DATA(J2),NV) 140 CONTINUE C C COMPUTE SINGLE PARTITION COPHENETIC CORRELATION COEFFICIENT C SUM1 = 0.0 SUM2 = 0.0 SUM3 = 0.0 SUM4 = 0.0 IVEC = 0 DO 150 I = 2, NE-NSUB II = I-1 DO 150 JJ = 1, II IVEC = IVEC+1 SUM3 = SUM3+EVEC(IVEC) SUM4 = SUM4+EVEC(IVEC)*EVEC(IVEC) IF (WD(JJ,I)) GO TO 150 SUM1 = SUM1+1. SUM2 = SUM2+EVEC(IVEC) 150 CONTINUE CONST = ((NE-NSUB)*((NE-NSUB)-1))/2 TOP = SUM2-SUM1*SUM3/CONST BOTTOM = SQRT((SUM1-SUM1*SUM1/CONST)*(SUM4-SUM3*SUM3/CONST)) IF (ABS(BOTTOM-0.0).LT.0.000005) THEN PFLAG = 1 PCORR = 999. ELSE PCORR = TOP/BOTTOM ENDIF C C PRINT RESULTS FOR EACH CLUSTER C IF (INF.EQ.0.OR.LEVEL.EQ.1) THEN WRITE (6,160) 12 160 FORMAT (A1) WRITE (6,170) 170 FORMAT (1X,'Detailed information for each cluster:') ENDIF C C BUILD LIST OF POINTS IN EACH CLUSTER C DO 250 I = 1, NC IDX = 0 DO 180 J = 1, NE-NSUB IF (MEMBR(J).EQ.I) THEN IDX = IDX+1 IF (INF.EQ.0) THEN LIST(IDX) = J ELSE LIST(IDX) = IP(J) ENDIF ENDIF 180 CONTINUE IF (INF.EQ.0.OR.LEVEL.EQ.1) THEN WRITE (6,190) I,NUMBR(I) 190 FORMAT (/,' Cluster',I3,' contains',I4,' data elements') WRITE (6,200) 200 FORMAT (/,' Membership List:',/) WRITE (6,210) (LIST(K),K=1,IDX) 210 FORMAT (1X,15I4) WRITE (6,220) 220 FORMAT (/,1X,'Centroid Coordinates:',/) DO 240 K = 1, NV KK = (I-1)*NV+K WRITE (6,230) CENTR(KK) 230 FORMAT (1X,G12.5) 240 CONTINUE ENDIF 250 CONTINUE IF (INF.EQ.0) THEN IF (PFLAG.EQ.0) THEN WRITE (6,260) PCORR 260 FORMAT (/,1X,'The point biserial goodness-of-fit index',/, * ' for the complete data set is:',F8.3,/) ELSE WRITE (6,270) 270 FORMAT (/,1X,'The point biserial goodness-of-fit index',/, * ' is not defined for the complete data set.') ENDIF WRITE (6,280) 12 280 FORMAT (A1) WRITE (6,290) 290 FORMAT (' Computing internal influence measures',/, * ' Any exceptions will be noted below:') ENDIF RETURN END C*********************************************************************** C RANDOM NUMBER GENERATOR C THIS IS A MODIFIED VERSION OF THE IBM UNIFORM RANDOM NUMBER C GENERATOR RANDU. ONE SHOULD CONSIDER REPLACING THIS GENERATOR WITH C A LOCALLY TESTED PACKAGE ESPECIALLY IF YOUR COMPUTER IS NOT AN C IBM PRODUCT OR DERIVATIVE. C C THE CALLING PARAMETERS ARE: C IX- RANDOM NUMBER SEED C U- ARRAY TO STORE UP TO 10 UNIFORM RANDOM NUMBERS C N- THE NUMBER OF RANDOM NUMBERS TO BE GENERATED(UP TO 10). C*********************************************************************** C SUBROUTINE RANDOM (IX,U,N) DIMENSION U(10) DO 30 K = 1, N IY = IX*65539 IF (IY) 10, 20, 20 10 IY = IY+2147483647+1 20 YFL = IY YFL = YFL*.4656613E-9 U(K) = YFL IX = IY 30 CONTINUE RETURN END C*********************************************************************** C DISTANCE FUNCTION C EUCLIDEAN DISTANCE COMPUTED ON RAW SCORES C*********************************************************************** C FUNCTION DIST(X,Y,NV) DIMENSION X(20),Y(20) DIST = 0 DO 10 I = 1, NV DIST = DIST+((X(I)-Y(I))**2) 10 CONTINUE DIST = SQRT(DIST) RETURN END