class KMeans extends Serializable with Logging
K-means clustering with a k-means++ like initialization mode (the k-means|| algorithm by Bahmani et al).
This is an iterative algorithm that will make multiple passes over the data, so any RDDs given to it should be cached by the user.
- Annotations
 - @Since( "0.8.0" )
 - Source
 - KMeans.scala
 
- Alphabetic
 - By Inheritance
 
- KMeans
 - Logging
 - Serializable
 - Serializable
 - AnyRef
 - Any
 
- Hide All
 - Show All
 
- Public
 - All
 
Instance Constructors
- 
      
      
      
        
      
    
      
        
        new
      
      
        KMeans()
      
      
      
Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, initializationMode: "k-means||", initializationSteps: 2, epsilon: 1e-4, seed: random, distanceMeasure: "euclidean"}.
Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, initializationMode: "k-means||", initializationSteps: 2, epsilon: 1e-4, seed: random, distanceMeasure: "euclidean"}.
- Annotations
 - @Since( "0.8.0" )
 
 
Value Members
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        !=(arg0: Any): Boolean
      
      
      
- Definition Classes
 - AnyRef → Any
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        ##(): Int
      
      
      
- Definition Classes
 - AnyRef → Any
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        ==(arg0: Any): Boolean
      
      
      
- Definition Classes
 - AnyRef → Any
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        asInstanceOf[T0]: T0
      
      
      
- Definition Classes
 - Any
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        clone(): AnyRef
      
      
      
- Attributes
 - protected[lang]
 - Definition Classes
 - AnyRef
 - Annotations
 - @throws( ... ) @native()
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        eq(arg0: AnyRef): Boolean
      
      
      
- Definition Classes
 - AnyRef
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        equals(arg0: Any): Boolean
      
      
      
- Definition Classes
 - AnyRef → Any
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        finalize(): Unit
      
      
      
- Attributes
 - protected[lang]
 - Definition Classes
 - AnyRef
 - Annotations
 - @throws( classOf[java.lang.Throwable] )
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        getClass(): Class[_]
      
      
      
- Definition Classes
 - AnyRef → Any
 - Annotations
 - @native()
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getDistanceMeasure: String
      
      
      
The distance suite used by the algorithm.
The distance suite used by the algorithm.
- Annotations
 - @Since( "2.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getEpsilon: Double
      
      
      
The distance threshold within which we've consider centers to have converged.
The distance threshold within which we've consider centers to have converged.
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getInitializationMode: String
      
      
      
The initialization algorithm.
The initialization algorithm. This can be either "random" or "k-means||".
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getInitializationSteps: Int
      
      
      
Number of steps for the k-means|| initialization mode
Number of steps for the k-means|| initialization mode
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getK: Int
      
      
      
Number of clusters to create (k).
Number of clusters to create (k).
- Annotations
 - @Since( "1.4.0" )
 - Note
 It is possible for fewer than k clusters to be returned, for example, if there are fewer than k distinct points to cluster.
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getMaxIterations: Int
      
      
      
Maximum number of iterations allowed.
Maximum number of iterations allowed.
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        getSeed: Long
      
      
      
The random seed for cluster initialization.
The random seed for cluster initialization.
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        hashCode(): Int
      
      
      
- Definition Classes
 - AnyRef → Any
 - Annotations
 - @native()
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        isInstanceOf[T0]: Boolean
      
      
      
- Definition Classes
 - Any
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        isTraceEnabled(): Boolean
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        log: Logger
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String, throwable: Throwable): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String, throwable: Throwable): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String, throwable: Throwable): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logName: String
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String, throwable: Throwable): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String, throwable: Throwable): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String): Unit
      
      
      
- Attributes
 - protected
 - Definition Classes
 - Logging
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        ne(arg0: AnyRef): Boolean
      
      
      
- Definition Classes
 - AnyRef
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        notify(): Unit
      
      
      
- Definition Classes
 - AnyRef
 - Annotations
 - @native()
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        notifyAll(): Unit
      
      
      
- Definition Classes
 - AnyRef
 - Annotations
 - @native()
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        run(data: RDD[Vector]): KMeansModel
      
      
      
Train a K-means model on the given set of points;
datashould be cached for high performance, because this is an iterative algorithm.Train a K-means model on the given set of points;
datashould be cached for high performance, because this is an iterative algorithm.- Annotations
 - @Since( "0.8.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setDistanceMeasure(distanceMeasure: String): KMeans.this.type
      
      
      
Set the distance suite used by the algorithm.
Set the distance suite used by the algorithm.
- Annotations
 - @Since( "2.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setEpsilon(epsilon: Double): KMeans.this.type
      
      
      
Set the distance threshold within which we've consider centers to have converged.
Set the distance threshold within which we've consider centers to have converged. If all centers move less than this Euclidean distance, we stop iterating one run.
- Annotations
 - @Since( "0.8.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setInitialModel(model: KMeansModel): KMeans.this.type
      
      
      
Set the initial starting point, bypassing the random initialization or k-means|| The condition model.k == this.k must be met, failure results in an IllegalArgumentException.
Set the initial starting point, bypassing the random initialization or k-means|| The condition model.k == this.k must be met, failure results in an IllegalArgumentException.
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setInitializationMode(initializationMode: String): KMeans.this.type
      
      
      
Set the initialization algorithm.
Set the initialization algorithm. This can be either "random" to choose random points as initial cluster centers, or "k-means||" to use a parallel variant of k-means++ (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
- Annotations
 - @Since( "0.8.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setInitializationSteps(initializationSteps: Int): KMeans.this.type
      
      
      
Set the number of steps for the k-means|| initialization mode.
Set the number of steps for the k-means|| initialization mode. This is an advanced setting -- the default of 2 is almost always enough. Default: 2.
- Annotations
 - @Since( "0.8.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setK(k: Int): KMeans.this.type
      
      
      
Set the number of clusters to create (k).
Set the number of clusters to create (k).
- Annotations
 - @Since( "0.8.0" )
 - Note
 It is possible for fewer than k clusters to be returned, for example, if there are fewer than k distinct points to cluster. Default: 2.
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setMaxIterations(maxIterations: Int): KMeans.this.type
      
      
      
Set maximum number of iterations allowed.
Set maximum number of iterations allowed. Default: 20.
- Annotations
 - @Since( "0.8.0" )
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        setSeed(seed: Long): KMeans.this.type
      
      
      
Set the random seed for cluster initialization.
Set the random seed for cluster initialization.
- Annotations
 - @Since( "1.4.0" )
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        synchronized[T0](arg0: ⇒ T0): T0
      
      
      
- Definition Classes
 - AnyRef
 
 - 
      
      
      
        
      
    
      
        
        def
      
      
        toString(): String
      
      
      
- Definition Classes
 - AnyRef → Any
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(): Unit
      
      
      
- Definition Classes
 - AnyRef
 - Annotations
 - @throws( ... )
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long, arg1: Int): Unit
      
      
      
- Definition Classes
 - AnyRef
 - Annotations
 - @throws( ... )
 
 - 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long): Unit
      
      
      
- Definition Classes
 - AnyRef
 - Annotations
 - @throws( ... ) @native()