Nd(3pm) | User Contributed Perl Documentation | Nd(3pm) |
PDL::CCS::Nd - N-dimensional sparse pseudo-PDLs
use PDL; use PDL::CCS::Nd; ##--------------------------------------------------------------------- ## Example data $missing = 0; ##-- missing values $dense = random(@dims); ##-- densely encoded pdl $dense->where(random(@dims)<=0.95) .= $missing; ## ... made sparse $whichND = $dense->whichND; ##-- which values are present? $nzvals = $dense->indexND($whichND); ##-- ... and what are they? ##--------------------------------------------------------------------- ## Constructors etc. $ccs = PDL::CCS:Nd->newFromDense($dense,%args); ##-- construct from dense matrix $ccs = PDL::CCS:Nd->newFromWhich($whichND,$nzvals,%args); ##-- construct from index+value pairs $ccs = $dense->toccs(); ##-- ensure PDL::CCS::Nd-hood $ccs = $ccs->toccs(); ##-- ... analogous to PDL::topdl() $ccs = $dense->toccs($missing,$flags); ##-- ... with optional arguments $ccs2 = $ccs->copy(); ##-- copy constructor $ccs2 = $ccs->copyShallow(); ##-- shallow copy, mainly for internal use $ccs2 = $ccs->shadow(%args); ##-- flexible copy method, for internal use ##--------------------------------------------------------------------- ## Maintenance & Decoding $ccs = $ccs->recode(); ##-- remove missing values from stored VALS $ccs = $ccs->sortwhich(); ##-- internal use only $dense2 = $ccs->decode(); ##-- extract to a (new) dense matrix $dense2 = $ccs->todense(); ##-- ensure dense storage $dense2 = $dense2->todense(); ##-- ... analogous to PDL::topdl() ##--------------------------------------------------------------------- ## PDL API: Basic Properties ##--------------------------------------- ## Type conversion & Checking $ccs2 = $ccs->convert($type); $ccs2 = $ccs->byte; $ccs2 = $ccs->short; $ccs2 = $ccs->ushort; $ccs2 = $ccs->long; $ccs2 = $ccs->longlong; $ccs2 = $ccs->float; $ccs2 = $ccs->double; ##--------------------------------------- ## Dimensions @dims = $ccs->dims(); $ndims = $ccs->ndims(); $dim = $ccs->dim($dimi); $nelem = $ccs->nelem; $bool = $ccs->isnull; $bool = $ccs->isempty; ##--------------------------------------- ## Inplace & Dataflow $ccs = $ccs->inplace(); $bool = $ccs->is_inplace; $bool = $ccs->set_inplace($bool); $ccs = $ccs->sever; ##--------------------------------------- ## Bad Value Handling $bool = $ccs->bad_is_missing(); ##-- treat BAD values as missing? $bool = $ccs->bad_is_missing($bool); $ccs = $ccs->badmissing(); ##-- ... a la inplace() $bool = $ccs->nan_is_missing(); ##-- treat NaN values as missing? $bool = $ccs->nan_is_missing($bool); $ccs = $ccs->nanmissing(); ##-- ... a la inplace() $ccs2 = $ccs->setnantobad(); $ccs2 = $ccs->setbadtonan(); $ccs2 = $ccs->setbadtoval($val); $ccs2 = $ccs->setvaltobad($val); ##--------------------------------------------------------------------- ## PDL API: Dimension Shuffling $ccs2 = $ccs->dummy($vdimi,$size); $ccs2 = $ccs->reorder(@vdims); $ccs2 = $ccs->xchg($vdim1,$vdim2); $ccs2 = $ccs->mv($vdimFrom,$vdimTo); $ccs2 = $ccs->transpose(); ##--------------------------------------------------------------------- ## PDL API: Indexing $nzi = $ccs->indexNDi($ndi); ##-- guts for indexing methods $ndi = $ccs->n2oned($ndi); ##-- returns 1d pseudo-index for $ccs $ivals = $ccs->indexND($ndi); $ivals = $ccs->index2d($xi,$yi); $ivals = $ccs->index($flati); ##-- buggy: no pseudo-threading! $ccs2 = $ccs->dice_axis($vaxis,$vaxis_ix); $nzi = $ccs->xindex1d($xi); ##-- nz-indices along 0th dimension $nzi = $ccs->pxindex1d($dimi,$xi); ##-- ... or any dimension, using ptr() $nzi = $ccs->xindex2d($xi,$yi); ##-- ... or for Cartesian product on 2d matrix $ccs2 = $ccs->xsubset1d($xi); ##-- subset along 0th dimension $ccs2 = $ccs->pxsubset1d($dimi,$xi); ##-- ... or any dimension, using ptr() $ccs2 = $ccs->xsubset2d($xi,$yi); ##-- ... or for Cartesian product on 2d matrix $whichND = $ccs->whichND(); $vals = $ccs->whichVals(); ##-- like $ccs->indexND($ccs->whichND), but faster $which = $ccs->which() $value = $ccs->at(@index); $ccs = $ccs->set(@index,$value); ##--------------------------------------------------------------------- ## PDL API: Ufuncs $ccs2 = $ccs->prodover; $ccs2 = $ccs->dprodover; $ccs2 = $ccs->sumover; $ccs2 = $ccs->dsumover; $ccs2 = $ccs->andover; $ccs2 = $ccs->orover; $ccs2 = $ccs->bandover; $ccs2 = $ccs->borover; $ccs2 = $ccs->maximum; $ccs2 = $ccs->minimum; $ccs2 = $ccs->maximum_ind; ##-- -1 indicates "missing" value is maximal $ccs2 = $ccs->minimum_ind; ##-- -1 indicates "missing" value is minimal $ccs2 = $ccs->nbadover; $ccs2 = $ccs->ngoodover; $ccs2 = $ccs->nnz; $sclr = $ccs->prod; $sclr = $ccs->dprod; $sclr = $ccs->sum; $sclr = $ccs->dsum; $sclr = $ccs->nbad; $sclr = $ccs->ngood; $sclr = $ccs->min; $sclr = $ccs->max; $bool = $ccs->any; $bool = $ccs->all; ##--------------------------------------------------------------------- ## PDL API: Unary Operations (Overloaded) $ccs2 = $ccs->bitnot; $ccs2 = ~$ccs; $ccs2 = $ccs->not; $ccs2 = !$ccs; $ccs2 = $ccs->sqrt; $ccs2 = $ccs->abs; $ccs2 = $ccs->sin; $ccs2 = $ccs->cos; $ccs2 = $ccs->exp; $ccs2 = $ccs->log; $ccs2 = $ccs->log10; ##--------------------------------------------------------------------- ## PDL API: Binary Operations (missing is annihilator) ## + $b may be a perl scalar, a dense PDL, or a PDL::CCS::Nd object ## + $c is always returned as a PDL::CCS::Nd ojbect ##--------------------------------------- ## Arithmetic $c = $ccs->plus($b); $c = $ccs1 + $b; $c = $ccs->minus($b); $c = $ccs1 - $b; $c = $ccs->mult($b); $c = $ccs1 * $b; $c = $ccs->divide($b); $c = $ccs1 / $b; $c = $ccs->modulo($b); $c = $ccs1 % $b; $c = $ccs->power($b); $c = $ccs1 ** $b; ##--------------------------------------- ## Comparisons $c = $ccs->gt($b); $c = ($ccs > $b); $c = $ccs->ge($b); $c = ($ccs >= $b); $c = $ccs->lt($b); $c = ($ccs < $b); $c = $ccs->le($b); $c = ($ccs <= $b); $c = $ccs->eq($b); $c = ($ccs == $b); $c = $ccs->ne($b); $c = ($ccs != $b); $c = $ccs->spaceship($b); $c = ($ccs <=> $b); ##--------------------------------------- ## Bitwise Operations $c = $ccs->and2($b); $c = ($ccs & $b); $c = $ccs->or2($b); $c = ($ccs | $b); $c = $ccs->xor($b); $c = ($ccs ^ $b); $c = $ccs->shiftleft($b); $c = ($ccs << $b); $c = $ccs->shiftright($b); $c = ($ccs >> $b); ##--------------------------------------- ## Matrix Operations $c = $ccs->inner($b); $c = $ccs->matmult($b); $c = $ccs x $b; $c_dense = $ccs->matmult2d_sdd($b_dense, $zc); $c_dense = $ccs->matmult2d_zdd($b_dense); $vnorm = $ccs->vnorm($pdimi); $vcos = $ccs->vcos_zdd($b_dense); $vcos = $ccs->vcos_pzd($b_ccs); ##--------------------------------------- ## Other Operations $ccs->rassgn($b); $ccs .= $b; $str = $ccs->string(); $str = "$ccs"; ##--------------------------------------------------------------------- ## Indexing Utilities ##--------------------------------------------------------------------- ## Low-Level Object Access $num_v_per_p = $ccs->_ccs_nvperp; ##-- num virtual / num physical $pdims = $ccs->pdims; $vdims = $ccs->vdims; ##-- physical|virtual dim pdl $nelem = $ccs->nelem_p; $nelem = $ccs->nelem_v; ##-- physical|virtual nelem $nstored = $ccs->nstored_p; $nstored = $ccs->nstored_v; ##-- physical|virtual Nnz+1 $nmissing = $ccs->nmissing_p; $nmissing = $ccs->nmissing_v; ##-- physical|virtual nelem-Nnz $ccs = $ccs->make_physically_indexed(); ##-- ensure all dimensions are physically indexed $bool = $ccs->allmissing(); ##-- are all values missing? $missing_val = $ccs->missing; ##-- get missing value $missing_val = $ccs->missing($missing_val); ##-- set missing value $ccs = $ccs->_missing($missing_val); ##-- ... returning the object $whichND_phys = $ccs->_whichND(); ##-- get/set physical indices $whichND_phys = $ccs->_whichND($whichND_phys); $nzvals_phys = $ccs->_nzvals(); ##-- get/set physically indexed values $nzvals_phys = $ccs->_nzvals($vals_phys); $vals_phys = $ccs->_vals(); ##-- get/set physically indexed values $vals_phys = $ccs->_vals($vals_phys); $bool = $ccs->hasptr($pdimi); ##-- check for cached Harwell-Boeing pointer ($ptr,$ptrix) = $ccs->ptr($pdimi); ##-- ... get one, caching for later use ($ptr,$ptrix) = $ccs->getptr($pdimi); ##-- ... compute one, regardless of cache ($ptr,$ptrix) = $ccs->setptr($pdimi,$p,$pix); ##-- ... set a cached pointer $ccs->clearptr($pdimi); ##-- ... clear a cached pointer $ccs->clearptrs(); ##-- ... clear all cached pointers $flags = $ccs->flags(); ##-- get/set object-local flags $flags = $ccs->flags($flags); $density = $ccs->density; ##-- get object density $crate = $ccs->compressionRate; ##-- get compression rate
PDL::CCS::Nd provides an object-oriented implementation of sparse N-dimensional vectors & matrices using a set of low-level PDLs to encode non-missing values. Currently, only a portion of the PDL API is implemented.
The following package-global variables are defined:
$BINOP_BLOCKSIZE_MIN = 1; $BINOP_BLOCKSIZE_MAX = 0;
Minimum (maximum) block size for block-wise incremental computation of binary operations. Zero or undef indicates no minimum (maximum).
PDL::CCS::Nd object are implemented as perl ARRAY-references. For more intuitive access to object components, the following package-global variables can be used as array indices to access internal object structure:
$ccs->[$PDIMS]->at($pdim_i) == $dimSize_i
$ccs->[$VDIMS]->at($vdim_i) == / -$vdimSize_i if $vdim_i is a dummy dimension \ $pdim_i otherwise
The $VDIMS piddle is used for dimension-shuffling transformations such as xchg() and reorder(), as well as for dummy().
all( $ccs->[$WHICH] == $dense->whichND->qsortvec )
A "physically indexed dimension" is just a dimension corresponding tp a single column of the $WHICH pdl, whereas a dummy dimension does not correspond to any physically indexed dimension.
all( $ccs->[$VALS]->slice("0:-2") == $dense->indexND($ccs->[$WHICH]) )
The final element of the $VALS piddle is referred to as "$missing", and represents the value of all elements of the dense physical matrix whose indices are not explicitly listed in the $WHICH piddle:
all( $ccs->[$VALS]->slice("-1") == $dense->flat->index(which(!$dense)) )
$WHICH->dice_axis(1,$ptrix)
is guaranteed to be primarily sorted along the pointer dimension $d, and stably sorted along all other dimensions, e.g. should be identical to:
$WHICH->mv($d,0)->qsortvec->mv(0,$d)
The following object-local constants are defined as bitmask flags:
Signature ($class_or_obj; dense(N1,...,NNdims); missing(); int flags)
Class method. Create and return a new PDL::CCS::Nd object from a dense N-dimensional PDL $dense. If specified, $missing is used as the value for "missing" elements, and $flags are used to initialize the object-local flags.
$missing defaults to BAD if the bad flag of $dense is set, otherwise $missing defaults to zero.
Signature ($ccs; dense(N1,...,NNdims); missing(); int flags)
Object method. Populate a sparse matrix object from a dense piddle $dense. See newFromDense().
Signature ($class_or_obj; int whichND(Ndims,Nnz); nzvals(Nnz+1); %options)
Class method. Create and return a new PDL::CCS::Nd object from a set of indices $whichND of non-missing elements in a (hypothetical) dense piddle and a vector $nzvals of the corresponding values. Known %options:
sorted => $bool, ##-- if true, $whichND is assumed to be pre-sorted steal => $bool, ##-- if true, $whichND and $nzvals are used literally (formerly implied 'sorted') ## + in this case, $nzvals should really be: $nzvals->append($missing) pdims => $pdims, ##-- physical dimension list; default guessed from $whichND (alias: 'dims') vdims => $vdims, ##-- virtual dims (default: sequence($nPhysDims)); alias: 'xdims' missing => $missing, ##-- default: BAD if $nzvals->badflag, 0 otherwise flags => $flags ##-- flags
$PDIMS, @$PTRS, @{$PTRS->[*]}, $FLAGS
Referenced components:
$VDIMS, $WHICH, $VALS, $PTRS->[*][*]
to => $ccs2, ##-- default: new pdims => $pdims2, ##-- default: $pdims1->pdl (alias: 'dims') vdims => $vdims2, ##-- default: $vdims1->pdl (alias: 'xdims') ptrs => \@ptrs2, ##-- default: [] which => $which2, ##-- default: undef vals => $vals2, ##-- default: undef flags => $flags, ##-- default: $flags1
The following basic PDL API methods are implemented and/or wrapped for PDL::CCS::Nd objects:
Type-checking and conversion routines are passed on to the $VALS sub-piddle.
Note that nelem() returns the number of hypothetically addressable cells -- the number of cells in the corresponding dense matrix, rather than the number of non-missing elements actually stored.
See also the bad_is_missing() and nan_is_missing() methods, below.
The following dimension-shuffling methods are supported, and should be compatible to their PDL counterparts:
Signature: ($ccs; int ndi(NVdims,Nind); int [o]nzi(Nind))
Guts for indexing methods. Given an N-dimensional index piddle $ndi, return a 1d index vector into $VALS for the corresponding values. Missing values are returned in $nzi as $Nnz == $ccs->_nnz_p;
Uses PDL::VectorValues::vsearchvec() internally, so expect O(Ndims * log(Nnz)) complexity. Although the theoretical complexity is tough to beat, this method could be made much faster in the usual (read "sparse") case by an intelligent use of $PTRS if and when available.
Just returns the literal $WHICH piddle if possible: beware of dataflow! Indices are NOT guaranteed to be returned in any surface-logical order, although physically indexed dimensions should be sorted in physical-lexicographic order.
The following functions from PDL::Ufunc are implemented, and ought to handle missing values correctly (i.e. as their dense counterparts would):
prodover prod dprodover dprod sumover sum dsumover dsum andover orover bandover borover maximum maximum_ind ##-- goofy if "missing" value is maximal max minimum minimum_ind ##-- goofy if "missing" value is minimal min nbadover nbad ngoodover ngood nnz any all
Some Ufuncs are still unimplemented. see PDL::CCS::Ufunc for details.
The following unary operations are supported:
FUNCTION OVERLOADS bitnot ~ not ! sqrt abs sin cos exp log log10
Note that any pointwise unary operation can be performed directly on the $VALS piddle. You can wrap such an operation MY_UNARY_OP on piddles into a PDL::CCS::Nd method using the idiom:
package PDL::CCS::Nd; *MY_UNARY_OP = _unary_op('MY_UNARY_OP', PDL->can('MY_UNARY_OP'));
Note also that unary operations may change the "missing" value associated with the sparse matrix. This is easily seen to be the Right Way To Do It if you consider unary "not" over a very sparse (say 99% missing) binary-valued matrix: is is much easier and more efficient to alter only the 1% of physically stored (non-missing) values as well as the missing value than to generate a new matrix with 99% non-missing values, assuming $missing==0.
A number of basic binary operations on PDL::CCS::Nd operations are supported, which will produce correct results only under the assumption that "missing" values $missing are annihilators for the operation in question. For example, if we want to compute:
$c = OP($a,$b)
for a binary operation OP on PDL::CCS::Nd objects $a and $b, the current implementation will produce the correct result for $c only if for all values $av in $a and $bv in $b:
OP($av,$b->missing) == OP($a->missing,$b->missing) , and OP($a->missing,$bv) == OP($a->missing,$b->missing)
This is true in general for OP==\&mult and $missing==0, but not e.g. for OP==\&plus and $missing==0. It should always hold for $missing==BAD (except in the case of assignment, which is a funny kind of operation anyways).
Currently, the only way to ensure that all values are computed correctly in the general case is for $a and $b to contain exactly the same physically indexed values, which rather defeats the purposes of sparse storage, particularly if implicit pseudo-threading is involved (because then we would likely wind up instantiating -- or at least inspecting -- the entire dense matrix). Future implementations may relax these restrictions somewhat.
The following binary operations are implemented:
FUNCTION OVERLOADS plus + minus - mult * divide / modulo % power **
FUNCTION OVERLOADS gt > ge >= lt < le <= eq == ne != spaceship <=>
FUNCTION OVERLOADS and2 & or2 | xor ^ shiftleft << shiftright >>
FUNCTION OVERLOADS inner (none) matmult x
FUNCTION OVERLOADS rassgn .= string ""
All supported binary operation functions obey the PDL input calling conventions (i.e. they all accept a third argument $swap), and delegate computation to the underlying PDL functions. Note that the PDL::CCS::Nd methods currently do NOT support a third "output" argument. To wrap a new binary operation MY_BINOP into a PDL::CCS::Nd method, you can use the following idiom:
package PDL::CCS::Nd; *MY_BINOP = _ccsnd_binary_op_mia('MY_BINOP', PDL->can('MY_BINOP'));
The low-level alignment of physically indexed values for binary operations is performed by the function PDL::CCS::ccs_binop_align_block_mia(). Computation is performed block-wise at the perl level to avoid over- rsp. underflow of the space requirements for the output PDL.
The following methods provide low-level access to PDL::CCS::Nd object structure:
Signature: ($ccs; int whichND(Ndims,Nnz1); vals(Nnz1))
Set or insert values in $ccs for the indices in $whichND to $vals. $whichND need not be sorted. Implicitly makes $ccs physically indexed. Returns the (destructively altered) $ccs.
Signature: ($ccs; int whichND(Ndims,Nnz1); vals(Nnz1))
Like insertWhich(), but assumes that no values for any of the $whichND indices are already present in $ccs. This is faster (because indexNDi need not be called), but less safe.
Alias: make_physical().
$pdimi defaults to zero. If $pdimi is zero, then it should hold that:
all( $pi2nzi==sequence($ccs->nstored_p) )
Perl by Larry Wall.
PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.
Many.
Bryan Jurish <moocow@cpan.org>
Copyright (C) 2007-2022, Bryan Jurish. All rights reserved.
This package is free software, and entirely without warranty. You may redistribute it and/or modify it under the same terms as Perl itself.
perl(1), PDL(3perl), PDL::SVDLIBC(3perl), PDL::CCS::Nd(3perl),
SVDLIBC: http://tedlab.mit.edu/~dr/SVDLIBC/
SVDPACKC: http://www.netlib.org/svdpack/
2024-03-31 | perl v5.38.2 |