import numpy as np #module in which most of the operations of our interest are defined
import matplotlib.pyplot as plt #for plotting
from math import * #module for exp,log,power, trig functions etc...
from numpy.linalg import * #module with linear algebraic functions
import scipy.linalg as sp
from sklearn.decomposition import *
X = np.array([[4.8 , 4 , 0 , 4.9 , 2.1],
[4.7 , 4.1 , 3.5 , 4 , 4.4],
[0 , 4.8 , 4 , 0 , 4.7],
[4.6 , 0 , 0 , 0 , 0],
[0 , 4.9 , 0 , 0 , 0],
[0 , 4.7 , 4.6 , 4.4 , 4.9],
[ 2.1 , 1.5 , 2.5 , 0 , 0]])
X
nmf = NMF(2)
W=np.around(nmf.fit_transform(X),3)
H =np.around(nmf.components_,3)
W #new basis
H
import sklearn
Sim =np.around(sklearn.metrics.pairwise.cosine_similarity(W, Y=None, dense_output=True),3)
Sim #similarity
np.around(sklearn.metrics.pairwise.cosine_similarity(X, Y=None, dense_output=True),3)
import numpy as np
from matrix_completion import svt_solve, calc_unobserved_rmse
m,n = X.shape
mask = np.ones((m,n))
for i in range(m):
for j in range(n):
if X[i,j] == 0:
mask[i,j] = 0
X_hat = svt_solve(X, mask)
print('Completed matrix : \n {}'.format(X_hat))
X
# Author: Olivier Grisel <olivier.grisel@ensta.org>
# Lars Buitinck <L.J.Buitinck@uva.nl>
# License: BSD 3 clause
from __future__ import print_function
from time import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from sklearn.datasets import fetch_20newsgroups
n_samples = 2000
n_features = 1000
n_topics = 10
n_top_words = 20
# Load the 20 newsgroups dataset and vectorize it. We use a few heuristics
# to filter out useless terms early on: the posts are stripped of headers,
# footers and quoted replies, and common English words, words occurring in
# only one document or in at least 95% of the documents are removed.
t0 = time()
print("Loading dataset and extracting TF-IDF features...")
dataset = fetch_20newsgroups(shuffle=True, random_state=1,
remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, max_features=n_features,
stop_words='english')
tfidf = vectorizer.fit_transform(dataset.data[:n_samples])
print("done in %0.3fs." % (time() - t0))
# Fit the NMF model
print("Fitting the NMF model with n_samples=%d and n_features=%d..."
% (n_samples, n_features))
nmf = NMF(n_components=n_topics, random_state=1).fit(tfidf)
print("done in %0.3fs." % (time() - t0))
feature_names = vectorizer.get_feature_names()
for topic_idx, topic in enumerate(nmf.components_):
print("Topic #%d:" % topic_idx)
print(" ".join([feature_names[i]
for i in topic.argsort()[:-n_top_words - 1:-1]]))
print()
nmf.fit_transform(tfidf).shape
nmf.fit_transform(tfidf)[0]
nmf.components_.shape
print(__doc__)
# Authors: Vlad Niculae, Alexandre Gramfort
# License: BSD 3 clause
import logging
from time import time
from numpy.random import RandomState
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cluster import MiniBatchKMeans
from sklearn import decomposition
# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
n_row, n_col = 2, 5
n_components = n_row * n_col
image_shape = (64, 64)
rng = RandomState(0)
# #############################################################################
# Load faces data
dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
faces = dataset.data
n_samples, n_features = faces.shape
# global centering
faces_centered = faces - faces.mean(axis=0)
# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
print("Dataset consists of %d faces" % n_samples)
def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
plt.figure(figsize=(2. * n_col, 2.26 * n_row))
plt.suptitle(title, size=16)
for i, comp in enumerate(images):
plt.subplot(n_row, n_col, i + 1)
vmax = max(comp.max(), -comp.min())
plt.imshow(comp.reshape(image_shape), cmap=cmap,
interpolation='nearest',
vmin=-vmax, vmax=vmax)
plt.xticks(())
plt.yticks(())
plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)
# List of the different estimators, whether to center and transpose the
# problem, and whether the transformer uses the clustering API.
estimators = [
('Eigenfaces - PCA using randomized SVD',
decomposition.PCA(n_components=n_components, svd_solver='randomized',
whiten=True),
True),
('Non-negative components - NMF',
decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3),
False),
]
# #############################################################################
# Plot a sample of the input data
plot_gallery("First centered Olivetti faces", faces_centered[:n_components])
# #############################################################################
# Do the estimation and plot it
for name, estimator, center in estimators:
print("Extracting the top %d %s..." % (n_components, name))
t0 = time()
data = faces
if center:
data = faces_centered
estimator.fit(data)
train_time = (time() - t0)
print("done in %0.3fs" % train_time)
if hasattr(estimator, 'cluster_centers_'):
components_ = estimator.cluster_centers_
else:
components_ = estimator.components_
# Plot an image representing the pixelwise variance provided by the
# estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
# via the PCA decomposition, also provides a scalar noise_variance_
# (the mean of pixelwise variance) that cannot be displayed as an image
# so we skip it.
if (hasattr(estimator, 'noise_variance_') and
estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case
plot_gallery("Pixelwise variance",
estimator.noise_variance_.reshape(1, -1), n_col=1,
n_row=1)
plot_gallery('%s - Train time %.1fs' % (name, train_time),
components_[:n_components])
plt.show()