forked from moreo/QuaPy
fixing dataset loading
This commit is contained in:
parent
b6820e8dba
commit
e81009e665
|
@ -1,4 +1,5 @@
|
||||||
from .base import *
|
from .base import *
|
||||||
from . import base
|
from .reader import *
|
||||||
from . import reader
|
|
||||||
from . import preprocessing
|
from . import preprocessing
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.sparse import issparse, dok_matrix
|
from scipy.sparse import issparse
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from quapy.functional import artificial_prevalence_sampling
|
from quapy.functional import artificial_prevalence_sampling
|
||||||
from scipy.sparse import vstack
|
from scipy.sparse import vstack
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
|
import numpy as np
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
||||||
from dataset.base import Dataset
|
from dataset.base import Dataset
|
||||||
from scipy.sparse import spmatrix
|
from scipy.sparse import spmatrix
|
||||||
import numpy as np
|
|
||||||
from utils.util import parallelize
|
from utils.util import parallelize
|
||||||
from .base import LabelledCollection
|
from .base import LabelledCollection
|
||||||
|
|
||||||
|
@ -17,8 +17,8 @@ def text2tfidf(dataset:Dataset, min_df=3, sublinear_tf=True, inplace=False, **kw
|
||||||
:return: a new Dataset in csr_matrix format (if inplace=False) or a reference to the current Dataset (inplace=True)
|
:return: a new Dataset in csr_matrix format (if inplace=False) or a reference to the current Dataset (inplace=True)
|
||||||
where the instances are stored in a csr_matrix of real-valued tfidf scores
|
where the instances are stored in a csr_matrix of real-valued tfidf scores
|
||||||
"""
|
"""
|
||||||
__check_type(dataset.training.instances, list, str)
|
__check_type(dataset.training.instances, np.ndarray, str)
|
||||||
__check_type(dataset.test.instances, list, str)
|
__check_type(dataset.test.instances, np.ndarray, str)
|
||||||
|
|
||||||
vectorizer = TfidfVectorizer(min_df=min_df, sublinear_tf=sublinear_tf, **kwargs)
|
vectorizer = TfidfVectorizer(min_df=min_df, sublinear_tf=sublinear_tf, **kwargs)
|
||||||
training_documents = vectorizer.fit_transform(dataset.training.instances)
|
training_documents = vectorizer.fit_transform(dataset.training.instances)
|
||||||
|
@ -45,8 +45,8 @@ def reduce_columns(dataset:Dataset, min_df=5, inplace=False):
|
||||||
:return: a new Dataset (if inplace=False) or a reference to the current Dataset (inplace=True)
|
:return: a new Dataset (if inplace=False) or a reference to the current Dataset (inplace=True)
|
||||||
where the dimensions corresponding to infrequent instances have been removed
|
where the dimensions corresponding to infrequent instances have been removed
|
||||||
"""
|
"""
|
||||||
__check_type(dataset.training, spmatrix)
|
__check_type(dataset.training.instances, spmatrix)
|
||||||
__check_type(dataset.test, spmatrix)
|
__check_type(dataset.test.instances, spmatrix)
|
||||||
assert dataset.training.instances.shape[1] == dataset.test.instances.shape[1], 'unaligned vector spaces'
|
assert dataset.training.instances.shape[1] == dataset.test.instances.shape[1], 'unaligned vector spaces'
|
||||||
|
|
||||||
def filter_by_occurrences(X, W):
|
def filter_by_occurrences(X, W):
|
||||||
|
@ -101,7 +101,7 @@ def __check_type(container, container_type=None, element_type=None):
|
||||||
assert isinstance(container, container_type), \
|
assert isinstance(container, container_type), \
|
||||||
f'unexpected type of container (expected {container_type}, found {type(container)})'
|
f'unexpected type of container (expected {container_type}, found {type(container)})'
|
||||||
if element_type:
|
if element_type:
|
||||||
assert isinstance(next(container), element_type), \
|
assert isinstance(container[0], element_type), \
|
||||||
f'unexpected type of element (expected {container_type}, found {type(container)})'
|
f'unexpected type of element (expected {container_type}, found {type(container)})'
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue