sleep.csv




Source: Allison, Truett and Cicchetti, Domenic V. (1976), "Sleep  in  Mammals: Ecological and Constitutional  Correlates",  _Science_,  November 12, vol. 194, pp. 732-734.

Variables below (from left to right) for Mammals Data Set:
species of animal
body weight in kg
brain weight in g
slow wave ("nondreaming") sleep (hrs/day)
paradoxical ("dreaming") sleep (hrs/day)
total sleep (hrs/day)  (sum of slow wave and paradoxical sleep)
maximum life span (years)
gestation time (days)
predation index (1-5)
                1 = minimum (least likely to be preyed upon)
                5 = maximum (most likely to be preyed upon)
sleep exposure index (1-5)
                1 = least exposed (e.g. animal sleeps in a 
                    well-protected den)
                5 = most exposed
overall danger index (1-5)
                (based on the above two indices and other information)
                1 = least danger (from other animals)
                5 = most danger (from other animals)
Note: Missing values denoted by -999.0

The above data set can be freely used for non-commercial purposes and can be freely distributed (permission in writing obtained from Dr. Truett Allison). Submitted by Roger Johnso nrwjohnso@silver.sdsmt.edu

분류 : Cluster Analysis




R-code.r

JaeseongYoo — Jun 2, 2014, 10:57 PM

rm(list = ls())

setwd("D:/Dropbox/DataSets/Blog Posts/20140602 Ecological Correlates of Sleep in mammals")

set.seed(1)
data = read.csv("sleep.txt", sep="\t")
names = data[,1]
data = data[,-1]
dimnames(data)[[1]] = names
dimnames(data)[[2]] = c("body_weight", "brain_weight", "slow_wave",  "paradoxical", "total_sleep",
                                        "maximum_life_span", "gestation_time", "predation_index", "sleep_exposure_index", "overall_danger_index")

eig = eigen(cor(data))
round(eig$value, 2)
 [1] 3.35 2.12 1.71 1.06 0.76 0.60 0.27 0.05 0.04 0.04
n_factor = sum((eig$value > 1)*1)

# Single Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="single")
plot(hclust_result, hang=-1, main="Single Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
47 11  1  2 
# Complete Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="complete")
plot(hclust_result, hang=-1, main="Complete Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
41 15  1  4 
# Average Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="average")
plot(hclust_result, hang=-1, main="Average Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
47 11  1  2 
# Centroid Method
hclust_result = hclust(dist(data, method="euclidean"), method="centroid")
plot(hclust_result, hang=-1, main="Centroid")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
57  1  1  2 
# Ward's Method
hclust_result = hclust(dist(data, method="euclidean"), method="ward.D2")
plot(hclust_result, hang=-1, main="Ward")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
41 13  1  6 
# K-means
kmeanclust_result =kmeans(data, n_factor, nstart=500)
kmeanclust_result
K-means clustering with 4 clusters of sizes 43, 4, 1, 13

Cluster means:
  body_weight brain_weight slow_wave paradoxical total_sleep
1       39.27       106.81     8.877       1.935       10.81
2       15.51        25.48     8.125       3.075       11.20
3     2547.00      4603.00     2.100       1.800        3.90
4      105.64       195.74  -999.000    -845.208     -300.15
  maximum_life_span gestation_time predation_index sleep_exposure_index
1            -29.15          114.2            2.93                2.233
2           -238.62         -999.0            1.75                1.250
3             69.00          624.0            3.00                5.000
4            -57.14          159.9            3.00                3.000
  overall_danger_index
1                2.628
2                1.500
3                4.000
4                2.769

Clustering vector:
African giant pouched rat                Arctic Fox 
                        1                         4 
   Arctic ground squirrel            Asian elephant 
                        4                         3 
                   Baboon             Big brown bat 
                        1                         1 
          Brazilian tapir                       Cat 
                        1                         1 
               Chimpanzee                Chinchilla 
                        1                         1 
                      Cow           Desert hedgehog 
                        1                         2 
                   Donkey     Eastern American mole 
                        4                         1 
                  Echidna         European hedgehog 
                        1                         1 
                   Galago                     Genet 
                        1                         2 
          Giant armadillo                   Giraffe 
                        2                         4 
                     Goat            Golden hamster 
                        1                         1 
                  Gorilla                 Gray seal 
                        4                         1 
                Gray wolf           Ground squirrel 
                        4                         1 
               Guinea pig                     Horse 
                        1                         1 
                   Jaguar                  Kangaroo 
                        4                         4 
Lesser short-tailed shrew          Little brown bat 
                        1                         1 
                      Man                  Mole rat 
                        1                         1 
          Mountain beaver                     Mouse 
                        1                         1 
               Musk shrew       N. American opossum 
                        1                         1 
    Nine-banded armadillo                     Okapi 
                        1                         4 
               Owl monkey              Patas monkey 
                        1                         1 
               Phanlanger                       Pig 
                        1                         1 
                   Rabbit                   Raccoon 
                        1                         4 
                      Rat                   Red fox 
                        1                         1 
            Rhesus monkey    Rock hyrax (Hetero. b) 
                        1                         1 
Rock hyrax (Procavia hab)                  Roe deer 
                        1                         4 
                    Sheep                Slow loris 
                        1                         4 
          Star nosed mole                    Tenrec 
                        2                         1 
               Tree hyrax                Tree shrew 
                        1                         1 
                   Vervet             Water opossum 
                        1                         1 
    Yellow-bellied marmot 
                        4 

Within cluster sum of squares by cluster:
[1] 5273523  778489       0 6636877
 (between_SS / total_SS =  79.8 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
require(cluster)
Loading required package: cluster
clusplot(data, kmeanclust_result$cluster, color=TRUE, shade=TRUE, labels=2, lines=0)

require(fpc)
Loading required package: fpc
Loading required package: MASS
Loading required package: mclust
Package 'mclust' version 4.3
Loading required package: flexmix
Loading required package: lattice

plot of chunk unnamed-chunk-1

plotcluster(data, kmeanclust_result$cluster)

plot of chunk unnamed-chunk-1


R-code.r


pollution.txt




This is the pollution data so loved by writers of papers on ridge regression.

Source: McDonald, G.C. and Schwing, R.C. (1973) 'Instabilities of regression estimates relating air pollution to mortality', Technometrics, vol.15, 463-482.

Variables in order:
PREC   Average annual precipitation in inches
JANT   Average January temperature in degrees F
JULT   Same for July
OVR65  % of 1960 SMSA population aged 65 or older
POPN   Average household size
EDUC   Median school years completed by those over 22
HOUS   % of housing units which are sound & with all facilities
DENS   Population per sq. mile in urbanized areas, 1960
NONW   % non-white population in urbanized areas, 1960
WWDRK  % employed in white collar occupations
POOR   % of families with income < $3000
HC     Relative hydrocarbon pollution potential
NOX    Same for nitric oxides
SO@    Same for sulphur dioxide
HUMID  Annual average % relative humidity at 1pm
MORT   Total age-adjusted mortality rate per 100,000

분류 : Cluster Analysis

출처 : http://www.umass.edu/statdata/statdata/






R-code.r

JaeseongYoo — May 29, 2014, 9:46 PM

rm(list = ls())

NANANA
[1] NA
set.seed(1)
data = scan("pollution.txt")
data = matrix(data, ncol=16)
dimnames(data)[[2]] = c("PREC", "JANT", "JULT", "OVR65", "POPN",
                                        "EDUC", "HOUS", "DENS", "NONW", "WWDRK",
                                        "POOR", "HC", "NOX", "SOat", "HUMID",
                                        "MORT")

eig = eigen(cor(data))
round(eig$value, 2)
 [1] 5.88 4.33 2.26 2.17 0.32 0.31 0.27 0.18 0.10 0.07 0.04 0.04 0.02 0.00
[15] 0.00 0.00
n_factor = sum((eig$value > 1)*1)

# Single Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="single")
plot(hclust_result, hang=-1, main="Single Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
49  4  4  3 
# Complete Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="complete")
plot(hclust_result, hang=-1, main="Complete Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
49  4  4  3 
# Average Linkage
hclust_result = hclust(dist(data, method="euclidean"), method="average")
plot(hclust_result, hang=-1, main="Average Linkage")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
49  4  4  3 
# Centroid Method
hclust_result = hclust(dist(data, method="euclidean"), method="centroid")
plot(hclust_result, hang=-1, main="Centroid")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
52  3  4  1 
# Ward's Method
hclust_result = hclust(dist(data, method="euclidean"), method="ward.D2")
plot(hclust_result, hang=-1, main="Ward")
rect.hclust(hclust_result, n_factor)

plot of chunk unnamed-chunk-1

cutree_result = cutree(hclust_result, n_factor)
table(cutree_result)
cutree_result
 1  2  3  4 
45  4  7  4 
# K-means
kmeanclust_result =kmeans(data, n_factor, nstart=500)
kmeanclust_result
K-means clustering with 4 clusters of sizes 7, 4, 45, 4

Cluster means:
      PREC    JANT    JULT   OVR65    POPN     EDUC     HOUS    DENS
1 2541.586   18.50 2418.34   27.71 2542.30    6.557 1963.969  104.51
2   13.250 4146.25    8.70  985.04   19.00 2758.250    8.125  903.72
3   34.933   43.42   32.55   43.51   32.25   34.842   33.658   44.75
4    9.025 1004.66   13.50 4381.00    7.50  915.862    8.500 3912.50
      NONW   WWDRK    POOR       HC     NOX    SOat   HUMID    MORT
1 2282.868   10.94 2401.56    9.471 2550.42   10.33 2045.75   10.49
2   17.750 4160.75   10.15  946.117  147.75 3420.25    8.05  941.36
3   37.716   37.09   46.66   33.584   41.04   33.37   35.01   32.27
4    8.825  963.76   42.75 4118.500    8.20  886.21   21.25 4995.00

Clustering vector:
 [1] 3 3 3 4 3 3 3 1 3 3 3 2 3 3 3 1 3 3 3 4 3 3 3 1 3 3 3 2 3 3 3 1 3 3 3
[36] 4 3 3 3 1 3 3 3 2 3 3 3 1 3 3 3 4 3 3 3 1 3 3 3 2

Within cluster sum of squares by cluster:
[1] 150869711  31947851    809151  37550545
 (between_SS / total_SS =  77.3 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"    
[5] "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
require(cluster)
Loading required package: cluster
clusplot(data, kmeanclust_result$cluster, color=TRUE, shade=TRUE, labels=2, lines=0)

require(fpc)
Loading required package: fpc
Loading required package: MASS
Loading required package: mclust
Package 'mclust' version 4.3
Loading required package: flexmix
Loading required package: lattice

plot of chunk unnamed-chunk-1

plotcluster(data, kmeanclust_result$cluster)

plot of chunk unnamed-chunk-1

+ Recent posts