Upgraded contrib/cl-simd (A. Gavrilov)

This commit is contained in:
Juan Jose Garcia Ripoll 2010-12-22 15:33:20 +01:00
parent 47ffa3b00a
commit 327bf83f8a
8 changed files with 302 additions and 119 deletions

View file

@ -21,6 +21,7 @@ ECL and SBCL (x86-64 only).
* SSE pack types::
* SSE array type::
* Differences from C intrinsics::
* Comparisons and NaN handling::
* Simple extensions::
* Lisp array accessors::
* Example::
@ -123,6 +124,10 @@ for @code{cmpleps}, or @code{/>-ps} for @code{cmpngtps}. In some
places the set of comparison functions is extended to cover the
full possible range.
@item
Scalar comparison predicates are named like @code{..-ss?} for
@code{comiss}, and @code{..-ssu?} for @code{ucomiss} wrappers.
@item
Conversion functions are renamed to @code{convert-*-to-*} and
@code{truncate-*-to-*}.
@ -151,6 +156,54 @@ and made SETF-able:
(The @code{-ap*} version requires alignment.)
@node Comparisons and NaN handling
@subsection Comparisons and NaN handling
Floating-point arithmetic intrinsics have trivial IEEE semantics
when given QNaN and SNaN arguments. Comparisons have more complex
behavior, detailed in the following table:
@multitable { @code{/>=-ss, />=-ps} } { @code{/>=-sd, />=-pd} } { Not greater or equal } { Result for NaN } { QNaN traps }
@item Single-float @tab Double-float @tab Condition @tab Result for NaN @tab QNaN traps
@item @code{=-ss}, @code{=-ps} @tab @code{=-sd}, @code{=-pd} @tab Equal @tab False @tab No
@item @code{<-ss}, @code{<-ps} @tab @code{<-sd}, @code{<-pd} @tab Less @tab False @tab Yes
@item @code{<=-ss}, @code{<=-ps} @tab @code{<=-sd}, @code{<=-pd} @tab Less or equal @tab False @tab Yes
@item @code{>-ss}, @code{>-ps} @tab @code{>-sd}, @code{>-pd} @tab Greater @tab False @tab Yes
@item @code{>=-ss}, @code{>=-ps} @tab @code{>=-sd}, @code{>=-pd} @tab Greater or equal @tab False @tab Yes
@item @code{/=-ss}, @code{/=-ps} @tab @code{/=-sd}, @code{/=-pd} @tab Not equal @tab True @tab No
@item @code{/<-ss}, @code{/<-ps} @tab @code{/<-sd}, @code{/<-pd} @tab Not less @tab True @tab Yes
@item @code{/<=-ss}, @code{/<=-ps} @tab @code{/<=-sd}, @code{/<=-pd} @tab Not less or equal @tab True @tab Yes
@item @code{/>-ss}, @code{/>-ps} @tab @code{/>-sd}, @code{/>-pd} @tab Not greater @tab True @tab Yes
@item @code{/>=-ss}, @code{/>=-ps} @tab @code{/>=-sd}, @code{/>=-pd} @tab Not greater or equal @tab True @tab Yes
@item @code{cmpord-ss}, @code{cmpord-ps} @tab @code{cmpord-sd}, @code{cmpord-pd}
@tab Ordered, i.e. no NaN args @tab False @tab No
@item @code{cmpunord-ss}, @code{cmpunord-ps} @tab @code{cmpunord-sd}, @code{cmpunord-pd}
@tab Unordered, i.e. with NaN args @tab True @tab No
@end multitable
Likewise for scalar comparison predicates, i.e. functions that return the
result of the comparison as a Lisp boolean instead of a bitmask sse-pack:
@multitable { Single-float } { Double-float } { Not greater or equal } { Result for NaN } { QNaN traps }
@item Single-float @tab Double-float @tab Condition @tab Result for NaN @tab QNaN traps
@item @code{=-ss?} @tab @code{=-sd?} @tab Equal @tab True @tab Yes
@item @code{=-ssu?} @tab @code{=-sdu?} @tab Equal @tab True @tab No
@item @code{<-ss?} @tab @code{<-sd?} @tab Less @tab True @tab Yes
@item @code{<-ssu?} @tab @code{<-sdu?} @tab Less @tab True @tab No
@item @code{<=-ss?} @tab @code{<=-sd?} @tab Less or equal @tab True @tab Yes
@item @code{<=-ssu?} @tab @code{<=-sdu?} @tab Less or equal @tab True @tab No
@item @code{>-ss?} @tab @code{>-sd?} @tab Greater @tab False @tab Yes
@item @code{>-ssu?} @tab @code{>-sdu?} @tab Greater @tab False @tab No
@item @code{>=-ss?} @tab @code{>=-sd?} @tab Greater or equal @tab False @tab Yes
@item @code{>=-ssu?} @tab @code{>=-sdu?} @tab Greater or equal @tab False @tab No
@item @code{/=-ss?} @tab @code{/=-sd?} @tab Not equal @tab False @tab Yes
@item @code{/=-ssu?} @tab @code{/=-sdu?} @tab Not equal @tab False @tab No
@end multitable
Note that MSDN specifies different return values for the C counterparts of some
of these functions when called with NaN arguments, but that seems to disagree
with the actually generated code.
@node Simple extensions
@subsection Simple extensions
@ -213,6 +266,9 @@ this module implements a set of AREF-like memory accessors:
@item
@code{(ROW-MAJOR-)?AREF-[AS]?P[SDI]} for whole-pack read & write.
@item
@code{(ROW-MAJOR-)?AREF-S(S|D|I64)} for scalar read & write.
@end itemize
(Where A = aligned; S = aligned streamed write.)
@ -222,14 +278,14 @@ array or vector, without restriction on the precise element
type (although it should be declared at compile time to
ensure generation of the fastest code).
Additional index bound checking is done to ensure that 16
Additional index bound checking is done to ensure that enough
bytes of memory are accessible after the specified index.
As an exception, ROW-MAJOR-AREF-PREFETCH-* does not do any
range checks at all, because the prefetch instructions
are officially safe to use with bad addresses. The
AREF-PREFETCH-* and *-CLFLUSH functions do only ordinary
index checks without the 16-byte extension.
index checks without the usual 16-byte extension.
@node Example
@subsection Example

View file

@ -32,6 +32,7 @@
(int-sse-pack :int-sse-pack)
(float-sse-pack :float-sse-pack)
(double-sse-pack :double-sse-pack)
(boolean :bool)
(single-float :float)
(double-float :double)
(fixnum :fixnum)
@ -107,7 +108,7 @@
(c::def-inline ',name ',mode ',arg-types ',ret-type ,call-str ,@flags)))
(defmacro def-intrinsic (name arg-types ret-type c-name
&key (export t) ret-arg reorder-args immediate-args)
&key (export t) ret-arg reorder-args immediate-args defun-body)
"Defines and exports an SSE intrinsic function with matching open-coding rules."
(let* ((anums (make-arg-nums arg-types))
(asyms (mapcar #'make-arg-name anums))
@ -130,7 +131,7 @@
,@(if (null immediate-args)
`((defun ,name ,asyms
(declare (optimize (speed 0) (debug 0) (safety 1)))
(ffi:c-inline ,asyms ,aftypes ,rftype ,call-str :one-liner t))))
(ffi:c-inline ,asyms ,aftypes ,rftype ,(or defun-body call-str) :one-liner t))))
(def-inline ,name :always ,(mapcar #'inline-arg-type-of arg-types) ,rftype
,call-str :inline-or-warn t))))
@ -155,10 +156,15 @@
,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg)))))
(defmacro def-sse-int-intrinsic (name int-type ret-type insn cost c-name
&key (arg-type ret-type) immediate-arg make-temporary)
&key (arg-type ret-type) immediate-arg make-temporary defun-body)
(declare (ignore insn cost make-temporary))
`(def-intrinsic ,name (,arg-type ,int-type ,@(if immediate-arg (list immediate-arg)))
,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg)))))
,ret-type ,c-name :immediate-args ,(if immediate-arg `((arg2 ,immediate-arg)))
:defun-body ,defun-body))
(defmacro def-comparison-intrinsic (name arg-type insn cost c-name &key commutative tags)
(declare (ignore insn cost commutative tags))
`(def-intrinsic ,name (,arg-type ,arg-type) boolean ,c-name))
(defmacro %def-aref-intrinsic (tag val-type c-type reader writer &key (aux-args "") (bsize 16))
"Defines and exports macros and functios that implement vectorized array access."
@ -236,11 +242,10 @@
,(fmtw "(&(#0)->array.self.~A[#1])" (second spec))))
known-elt-types)))))))
(defmacro def-aref-intrinsic (tag val-type reader-fun writer-fun &key (check-bounds t))
(defmacro def-aref-intrinsic (tag val-type reader-fun writer-fun &key (ref-size 16))
`(%def-aref-intrinsic ,tag ,val-type ,(pointer-c-type-of val-type)
,(get reader-fun 'c-function-name) ,(get writer-fun 'c-function-name)
:bsize ,(ecase check-bounds
(t 16) ((nil) 0) (:no-gap 1))
:bsize ,ref-size
:aux-args ,(get reader-fun 'c-call-aux-args)))
(defmacro def-mem-intrinsic (name c-type ret-type c-name &key (public t)

View file

@ -157,7 +157,7 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(array-bounding-indices-bad-error ,array ,index (+ ,index ,access-size)))
,@code))))))
(defun sse-array-info-or-give-up (lvar)
(defun sse-array-info-or-give-up (lvar ref-size)
;; Look up the SSE element size and check if it is definitely a vector
(let ((type (lvar-type lvar)))
(unless (and (array-type-p type)
@ -170,12 +170,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(unless shift
(give-up-ir1-transform "not a known SSE-compatible array element type: ~S"
(type-specifier etype)))
(values (ash 1 shift) ; step
(1- (ash 16 (- shift))) ; gap
(values (ash 1 shift) ; step
(ash (1- ref-size) (- shift)) ; gap
(and (listp (array-type-dimensions type))
(if (null (cdr (array-type-dimensions type))) :yes :no))))))
(defmacro def-aref-intrinsic (postfix rtype reader writer &key (check-bounds t))
(defmacro def-aref-intrinsic (postfix rtype reader writer &key (ref-size 16))
(let* ((rm-aref (symbolicate "ROW-MAJOR-AREF-" postfix))
(rm-aset (if writer (symbolicate "ROW-MAJOR-ASET-" postfix)))
(aref (symbolicate "AREF-" postfix))
@ -186,10 +186,9 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(writer/ix-vop (if writer (symbolicate "%" writer "/IX")))
(rtype (or rtype '(values)))
(index-expression
(ecase check-bounds
(t ``(the index (%check-bound array (%sse-array-size array ,gap) index)))
((nil) ``(the index index))
(:no-gap ``(the index (%check-bound array (%sse-array-size array 0) index))))))
(if (= ref-size 0)
``(the signed-word index)
``(the signed-word (%check-bound array (%sse-array-size array ,gap) index)))))
`(progn
;; ROW-MAJOR-AREF
(export ',rm-aref)
@ -197,11 +196,11 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(defun ,rm-aref (array index)
(with-sse-data ((sap data array)
(offset index))
(,reader-vop sap offset)))
(,reader-vop sap offset 1 0)))
;;
(deftransform ,rm-aref ((array index) (simple-array t) * :important t)
,(format nil "open-code ~A" rm-aref)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size)
(declare (ignorable gap))
`(,',reader/ix-vop (array-data-expr array ,is-vector)
,,index-expression
@ -213,14 +212,14 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(declare (truly-dynamic-extent indices))
(with-sse-data ((sap data array)
(offset (%array-row-major-index array indices)))
(,reader-vop sap offset)))
(,reader-vop sap offset 1 0)))
;;
(defoptimizer (,aref derive-type) ((array &rest indices) node)
(assert-array-rank array (length indices))
(values-specifier-type ',rtype))
(deftransform ,aref ((array &rest indices) (simple-array &rest t) * :important t)
,(format nil "open-code ~A" aref)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size)
(declare (ignorable gap))
(let ((syms (make-gensym-list (length indices))))
`(lambda (array ,@syms)
@ -236,12 +235,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(defun ,rm-aset (array index new-value)
(with-sse-data ((sap data array)
(offset index))
(,writer-vop sap offset (the ,rtype new-value))
(,writer-vop sap offset 1 0 (the ,rtype new-value))
new-value))
;;
(deftransform ,rm-aset ((array index value) (simple-array t t) * :important t)
,(format nil "open-code ~A" rm-aset)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size)
(declare (ignorable gap))
`(progn
(,',writer/ix-vop (array-data-expr array ,is-vector)
@ -256,7 +255,7 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(let ((new-value (car (last stuff))))
(with-sse-data ((sap data array)
(offset (%array-row-major-index array (nbutlast stuff))))
(,writer-vop sap offset (the ,rtype new-value))
(,writer-vop sap offset 1 0 (the ,rtype new-value))
new-value)))
;;
(defoptimizer (,aset derive-type) ((array &rest stuff) node)
@ -266,12 +265,12 @@ Should be assumed to be SIMPLE-ARRAY, except that displacing with MAKE-SSE-ARRAY
(specifier-type ',rtype))
(deftransform ,aset ((array &rest stuff) (simple-array &rest t) * :important t)
,(format nil "open-code ~A" aset)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array)
(multiple-value-bind (step gap is-vector) (sse-array-info-or-give-up array ,ref-size)
(declare (ignorable gap))
(let ((syms (make-gensym-list (length stuff))))
`(lambda (array ,@syms)
(let ((index (the index ,(if (eq is-vector :yes) (first syms)
`(array-row-major-index array ,@(butlast syms))))))
(let ((index ,(if (eq is-vector :yes) (first syms)
`(array-row-major-index array ,@(butlast syms)))))
(,',writer/ix-vop (array-data-expr array ,is-vector)
,,index-expression
,step ,+vector-data-fixup+

View file

@ -59,27 +59,55 @@
;;; Index-offset splicing
(defun fold-index-addressing (fun-name index scale &key setter-p)
(multiple-value-bind (func index-args) (extract-fun-args index '(+ - * ash) 2)
(defun skip-casts (lvar)
(let ((inside (lvar-uses lvar)))
(if (and (cast-p inside)
(policy inside (= sb-c::type-check 0)))
(skip-casts (cast-value inside))
lvar)))
(defun delete-casts (lvar)
(loop for inside = (lvar-uses lvar)
while (cast-p inside)
do (delete-filter inside lvar (cast-value inside))))
(defun fold-index-addressing (fun-name index scale offset &key prefix-args postfix-args)
(multiple-value-bind (func index-args)
(extract-fun-args (skip-casts index) '(+ - * ash) 2)
(destructuring-bind (x constant) index-args
(declare (ignorable x))
(unless (constant-lvar-p constant)
(give-up-ir1-transform))
(let ((value (lvar-value constant))
(scale-value (lvar-value scale)))
(case func
(* (unless (typep (* value scale-value) '(signed-byte 32))
(give-up-ir1-transform "constant is too large for inlining")))
(ash (unless (and (>= value 0)
(typep (ash scale-value value) '(signed-byte 32)))
(give-up-ir1-transform "index shift is unsuitable for inlining"))))
(splice-fun-args index func 2)
(let* ((value-arg (when setter-p '(value)))
(is-scale (member func '(* ash)))
(new-scale (if is-scale `(,func scale const) 'scale))
(new-offset (if is-scale 'offset `(,func offset (* const scale)))))
`(lambda (thing index const scale offset ,@value-arg)
(,fun-name thing index ,new-scale ,new-offset ,@value-arg)))))))
(scale-value (lvar-value scale))
(offset-value (lvar-value offset)))
(unless (integerp value)
(give-up-ir1-transform))
(multiple-value-bind (new-scale new-offset)
(ecase func
(+ (values scale-value (+ offset-value (* value scale-value))))
(- (values scale-value (- offset-value (* value scale-value))))
(* (values (* scale-value value) offset-value))
(ash (unless (>= value 0)
(give-up-ir1-transform "negative index shift"))
(values (ash scale-value value) offset-value)))
(unless (and (typep new-scale '(signed-byte 32))
(typep new-offset 'signed-word))
(give-up-ir1-transform "constant is too large for inlining"))
(delete-casts index)
(splice-fun-args index func 2)
`(lambda (,@prefix-args thing index const scale offset ,@postfix-args)
(declare (ignore const scale offset))
(,fun-name ,@prefix-args thing (the signed-word index) ,new-scale ,new-offset ,@postfix-args)))))))
(deftransform fold-ref-index-addressing ((thing index scale offset) * * :defun-only t :node node)
(fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset))
(deftransform fold-xmm-ref-index-addressing ((value thing index scale offset) * * :defun-only t :node node)
(fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset :prefix-args '(value)))
(deftransform fold-set-index-addressing ((thing index scale offset value) * * :defun-only t :node node)
(fold-index-addressing (lvar-fun-name (basic-combination-fun node)) index scale offset :postfix-args '(value)))
;;; Index-offset addressing
@ -367,8 +395,8 @@ May emit additional instructions using the temporary register."
(iv :scs (unsigned-reg unsigned-stack immediate)))
(:arg-types sse-pack unsigned-num))
(defmacro def-sse-int-intrinsic (&whole whole name itype rtype insn cost c-name &key make-temporary immediate-arg)
(declare (ignore c-name))
(defmacro def-sse-int-intrinsic (&whole whole name itype rtype insn cost c-name &key make-temporary immediate-arg defun-body)
(declare (ignore c-name defun-body))
(let* ((imm (if immediate-arg '(imm)))
(immt (if immediate-arg (list immediate-arg)))
(unsigned? (subtypep itype 'unsigned-byte)))
@ -393,6 +421,40 @@ May emit additional instructions using the temporary register."
make-temporary)
(inst ,insn r ,(if make-temporary 'tmp '(ensure-reg-or-mem iv)) ,@imm))))))
;;; Comparison predicate intrinsics
(define-vop (sse-comparison-op)
(:args (x :scs (sse-reg))
(y :scs (sse-reg sse-pack-immediate)))
(:arg-types sse-pack sse-pack)
(:policy :fast-safe)
(:note "inline SSE binary comparison predicate")
(:vop-var vop)
(:save-p :compute-only))
(define-vop (sse-comparison-comm-op sse-comparison-op)
(:args (x :scs (sse-reg)
:load-if (not (and (sc-is x sse-pack-immediate)
(sc-is y sse-reg))))
(y :scs (sse-reg sse-pack-immediate))))
(defmacro def-comparison-intrinsic (&whole whole name arg-type insn cost c-name &key commutative tags)
(declare (ignore arg-type c-name))
(let* ()
`(progn
(export ',name)
(save-intrinsic-spec ,name ,whole)
(defknown ,name (sse-pack sse-pack) boolean (foldable flushable))
(define-vop (,name ,(if commutative 'sse-comparison-comm-op 'sse-comparison-op))
(:translate ,name)
(:conditional ,@tags)
(:generator ,cost
,(if commutative
`(if (sc-is x sse-reg)
(inst ,insn x y)
(inst ,insn y x))
`(inst ,insn x y)))))))
;;; Memory intrinsics
(define-vop (sse-load-base-op)
@ -401,40 +463,50 @@ May emit additional instructions using the temporary register."
(:note "inline SSE load operation"))
(define-vop (sse-load-op sse-load-base-op)
(:args (sap :scs (sap-reg))
(offset :scs (signed-reg)))
(:arg-types system-area-pointer signed-num))
(:args (sap :scs (sap-reg) :to :eval)
(index :scs (signed-reg immediate) :target tmp))
(:arg-types system-area-pointer signed-num
(:constant fixnum) (:constant signed-word))
(:temporary (:sc signed-reg :from (:argument 1)) tmp)
(:info scale offset))
(define-vop (sse-load-op/tag sse-load-base-op)
(:args (sap :scs (sap-reg) :to :eval)
(index :scs (any-reg signed-reg immediate) :target tmp))
(:arg-types system-area-pointer tagged-num
(:constant tagged-load-scale) (:constant signed-word))
(:temporary (:sc any-reg :from (:argument 1)) tmp)
(:info scale offset))
(define-vop (sse-xmm-load-op sse-load-base-op)
(:args (value :scs (sse-reg sse-pack-immediate) :target r)
(sap :scs (sap-reg))
(offset :scs (signed-reg)))
(:arg-types sse-pack system-area-pointer signed-num))
(sap :scs (sap-reg) :to :eval)
(index :scs (signed-reg immediate) :target tmp))
(:arg-types sse-pack system-area-pointer signed-num
(:constant fixnum) (:constant signed-word))
(:temporary (:sc signed-reg :from (:argument 2)) tmp)
(:info scale offset))
(define-vop (sse-load-imm-op sse-load-base-op)
(:args (sap :scs (sap-reg)))
(:arg-types system-area-pointer
(:constant (signed-byte 32)))
(:info offset))
(define-vop (sse-xmm-load-imm-op sse-load-base-op)
(define-vop (sse-xmm-load-op/tag sse-load-base-op)
(:args (value :scs (sse-reg sse-pack-immediate) :target r)
(sap :scs (sap-reg)))
(:arg-types sse-pack system-area-pointer
(:constant (signed-byte 32)))
(:info offset))
(sap :scs (sap-reg) :to :eval)
(index :scs (any-reg signed-reg immediate) :target tmp))
(:arg-types sse-pack system-area-pointer tagged-num
(:constant tagged-load-scale) (:constant signed-word))
(:temporary (:sc any-reg :from (:argument 2)) tmp)
(:info scale offset))
(define-vop (sse-load-ix-op sse-load-base-op)
(:args (sap :scs (descriptor-reg) :to :eval)
(index :scs (signed-reg immediate) :target tmp))
(:arg-types * signed-num (:constant fixnum) (:constant fixnum))
(:arg-types * signed-num (:constant fixnum) (:constant signed-word))
(:temporary (:sc signed-reg :from (:argument 1)) tmp)
(:info scale offset))
(define-vop (sse-load-ix-op/tag sse-load-base-op)
(:args (sap :scs (descriptor-reg) :to :eval)
(index :scs (any-reg signed-reg immediate) :target tmp))
(:arg-types * tagged-num (:constant tagged-load-scale) (:constant fixnum))
(:arg-types * tagged-num (:constant tagged-load-scale) (:constant signed-word))
(:temporary (:sc any-reg :from (:argument 1)) tmp)
(:info scale offset))
@ -442,10 +514,8 @@ May emit additional instructions using the temporary register."
&key register-arg tags postfix-fmt (size :qword))
(declare (ignore c-name postfix-fmt))
(let* ((vop (symbolicate "%" name))
(c-vop (symbolicate vop "-C"))
(ix-vop (symbolicate vop "/IX"))
(valtype (if register-arg '(sse-pack)))
(valarg (if register-arg '(value)))
(r-arg (if rtype '(r)))
(rtypes (if rtype
`(:result-types ,(type-name-to-primitive rtype))
@ -454,24 +524,26 @@ May emit additional instructions using the temporary register."
`(progn
(export ',name)
(save-intrinsic-spec ,name ,whole)
(defknown ,vop (,@valtype system-area-pointer fixnum) ,(or rtype '(values)) (flushable always-translatable))
(defknown ,vop (,@valtype system-area-pointer signed-word fixnum signed-word)
,(or rtype '(values)) (flushable always-translatable))
(define-vop (,vop ,(if register-arg 'sse-xmm-load-op 'sse-load-op))
(:translate ,vop)
,rtypes
(:generator 5
,(if register-arg `(ensure-load ,rtype r value))
(inst ,insn ,@tags ,@r-arg (make-ea ,size :base sap :index offset))))
(define-vop (,c-vop ,(if register-arg 'sse-xmm-load-imm-op 'sse-load-imm-op))
(inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp))))
(define-vop (,(symbolicate vop "/TAG") ,(if register-arg 'sse-xmm-load-op/tag 'sse-load-op/tag))
(:translate ,vop)
,rtypes
(:generator 4
,(if register-arg `(ensure-load ,rtype r value))
(inst ,insn ,@tags ,@r-arg (make-ea ,size :base sap :disp offset))))
(def-splice-transform ,vop (,@valarg (sap+ sap offset1) offset2)
(,vop ,@valarg sap (+ offset1 offset2)))
(inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp :fixnum-index t))))
(%deftransform ',vop '(function * *)
#',(if register-arg 'fold-xmm-ref-index-addressing 'fold-ref-index-addressing)
"fold semi-constant offset expressions")
,@(if (null register-arg)
`(;; Vector indexing version
(defknown ,ix-vop (simple-array fixnum fixnum fixnum) ,(or rtype '(values))
(defknown ,ix-vop (simple-array signed-word fixnum signed-word) ,(or rtype '(values))
(flushable always-translatable))
(define-vop (,ix-vop sse-load-ix-op)
(:translate ,ix-vop)
@ -483,33 +555,34 @@ May emit additional instructions using the temporary register."
,rtypes
(:generator 3
(inst ,insn ,@tags ,@r-arg (make-scaled-ea ,size sap index scale offset tmp :fixnum-index t))))
(deftransform ,ix-vop ((thing index scale offset))
"fold semi-constant index expressions"
(fold-index-addressing ',ix-vop index scale)))))))
(%deftransform ',ix-vop '(function * *) #'fold-ref-index-addressing
"fold semi-constant index expressions"))))))
(define-vop (sse-store-base-op)
(:policy :fast-safe)
(:note "inline SSE store operation"))
(define-vop (sse-store-op sse-store-base-op)
(:args (sap :scs (sap-reg))
(offset :scs (signed-reg))
(:args (sap :scs (sap-reg) :to :eval)
(index :scs (signed-reg immediate) :target tmp)
(value :scs (sse-reg)))
(:arg-types system-area-pointer signed-num sse-pack))
(:arg-types system-area-pointer signed-num (:constant fixnum) (:constant signed-word) sse-pack)
(:temporary (:sc signed-reg :from (:argument 1)) tmp)
(:info scale offset))
(define-vop (sse-store-imm-op sse-store-base-op)
(:args (sap :scs (sap-reg))
(define-vop (sse-store-op/tag sse-store-base-op)
(:args (sap :scs (sap-reg) :to :eval)
(index :scs (any-reg signed-reg immediate) :target tmp)
(value :scs (sse-reg)))
(:arg-types system-area-pointer
(:constant (signed-byte 32))
sse-pack)
(:info offset))
(:arg-types system-area-pointer tagged-num (:constant tagged-load-scale) (:constant signed-word) sse-pack)
(:temporary (:sc any-reg :from (:argument 1)) tmp)
(:info scale offset))
(define-vop (sse-store-ix-op sse-store-base-op)
(:args (sap :scs (descriptor-reg) :to :eval)
(index :scs (signed-reg immediate) :target tmp)
(value :scs (sse-reg)))
(:arg-types * signed-num (:constant fixnum) (:constant fixnum) sse-pack)
(:arg-types * signed-num (:constant fixnum) (:constant signed-word) sse-pack)
(:temporary (:sc signed-reg :from (:argument 1)) tmp)
(:info scale offset))
@ -517,31 +590,31 @@ May emit additional instructions using the temporary register."
(:args (sap :scs (descriptor-reg) :to :eval)
(index :scs (any-reg signed-reg immediate) :target tmp)
(value :scs (sse-reg)))
(:arg-types * tagged-num (:constant tagged-load-scale) (:constant fixnum) sse-pack)
(:arg-types * tagged-num (:constant tagged-load-scale) (:constant signed-word) sse-pack)
(:temporary (:sc any-reg :from (:argument 1)) tmp)
(:info scale offset))
(defmacro def-store-intrinsic (&whole whole name rtype insn c-name &key setf-name)
(declare (ignore rtype c-name))
(let* ((vop (symbolicate "%" name))
(c-vop (symbolicate vop "-C"))
(ix-vop (symbolicate vop "/IX")))
`(progn
,(unless setf-name `(export ',name))
(save-intrinsic-spec ,name ,whole)
(defknown ,vop (system-area-pointer fixnum sse-pack) (values) (unsafe always-translatable))
(defknown ,vop (system-area-pointer signed-word fixnum signed-word sse-pack) (values)
(unsafe always-translatable))
(define-vop (,vop sse-store-op)
(:translate ,vop)
(:generator 5
(inst ,insn (make-ea :qword :base sap :index offset) value)))
(define-vop (,c-vop sse-store-imm-op)
(inst ,insn (make-scaled-ea :qword sap index scale offset tmp) value)))
(define-vop (,(symbolicate vop "/TAG") sse-store-op/tag)
(:translate ,vop)
(:generator 4
(inst ,insn (make-ea :qword :base sap :disp offset) value)))
(def-splice-transform ,vop ((sap+ sap offset1) offset2 new-value)
(,vop sap (+ offset1 offset2) new-value))
(inst ,insn (make-scaled-ea :qword sap index scale offset tmp :fixnum-index t) value)))
(%deftransform ',vop '(function * *) #'fold-set-index-addressing
"fold semi-constant offset expressions")
;; Vector indexing version
(defknown ,ix-vop (simple-array fixnum fixnum fixnum sse-pack) (values)
(defknown ,ix-vop (simple-array signed-word fixnum signed-word sse-pack) (values)
(unsafe always-translatable))
(define-vop (,ix-vop sse-store-ix-op)
(:translate ,ix-vop)
@ -551,7 +624,6 @@ May emit additional instructions using the temporary register."
(:translate ,ix-vop)
(:generator 3
(inst ,insn (make-scaled-ea :qword sap index scale offset tmp :fixnum-index t) value)))
(deftransform ,ix-vop ((thing index scale offset value))
"fold semi-constant index expressions"
(fold-index-addressing ',ix-vop index scale :setter-p t)))))
(%deftransform ',ix-vop '(function * *) #'fold-set-index-addressing
"fold semi-constant index expressions"))))

View file

@ -45,6 +45,11 @@
(declare (type sse-pack x)
(type ,itype iv))
(truly-the ,rtype (%primitive ,name x iv)))))
(def-comparison-intrinsic (name arg-type insn cost c-name &key &allow-other-keys)
(declare (ignore insn cost c-name arg-type))
`(defun ,name (x y)
(declare (type sse-pack x y))
(truly-the boolean (,name x y))))
(def-load-intrinsic (name rtype insn c-name &key register-arg &allow-other-keys)
(declare (ignore insn c-name))
(let* ((vop (symbolicate "%" name))
@ -54,10 +59,10 @@
(defun ,name (,@valarg pointer &optional (offset 0))
(declare ,@(if register-arg '((type sse-pack value)))
(type system-area-pointer pointer)
(type fixnum offset))
(type signed-word offset))
,(if rtype
`(truly-the ,rtype (,vop ,@valarg pointer offset))
`(,vop ,@valarg pointer offset))))))
`(truly-the ,rtype (,vop ,@valarg pointer offset 1 0))
`(,vop ,@valarg pointer offset 1 0))))))
(def-store-intrinsic (name rtype insn c-name &key setf-name &allow-other-keys)
(declare (ignore insn c-name))
(let* ((vop (symbolicate "%" name)))
@ -66,8 +71,8 @@
(defun ,name (pointer value &optional (offset 0))
(declare (type system-area-pointer pointer)
(type sse-pack value)
(type fixnum offset))
(,vop pointer offset value)
(type signed-word offset))
(,vop pointer offset 1 0 value)
(truly-the ,rtype value))
,(if setf-name
`(defsetf ,setf-name (pointer &optional (offset 0)) (value)

View file

@ -10,15 +10,19 @@
;;; Prefetch: AREF-PREFETCH-*, ROW-MAJOR-AREF-PREFETCH-*
(def-aref-intrinsic #:PREFETCH-T0 nil cpu-prefetch-t0 nil :check-bounds nil)
(def-aref-intrinsic #:PREFETCH-T1 nil cpu-prefetch-t1 nil :check-bounds nil)
(def-aref-intrinsic #:PREFETCH-T2 nil cpu-prefetch-t2 nil :check-bounds nil)
(def-aref-intrinsic #:PREFETCH-NTA nil cpu-prefetch-nta nil :check-bounds nil)
(def-aref-intrinsic #:PREFETCH-T0 nil cpu-prefetch-t0 nil :ref-size 0)
(def-aref-intrinsic #:PREFETCH-T1 nil cpu-prefetch-t1 nil :ref-size 0)
(def-aref-intrinsic #:PREFETCH-T2 nil cpu-prefetch-t2 nil :ref-size 0)
(def-aref-intrinsic #:PREFETCH-NTA nil cpu-prefetch-nta nil :ref-size 0)
(def-aref-intrinsic #:CLFLUSH nil cpu-clflush nil :check-bounds :no-gap)
(def-aref-intrinsic #:CLFLUSH nil cpu-clflush nil :ref-size 1)
;;; Single-float
;; AREF-SS, ROW-MAJOR-AREF-SS
(def-aref-intrinsic #:SS float-sse-pack mem-ref-ss mem-set-ss :ref-size 4)
;; AREF-PS, ROW-MAJOR-AREF-PS
(def-aref-intrinsic #:PS float-sse-pack mem-ref-ps mem-set-ps)
@ -33,6 +37,10 @@
;;; Double-float
;; AREF-SD, ROW-MAJOR-AREF-SD
(def-aref-intrinsic #:SD double-sse-pack mem-ref-sd mem-set-sd :ref-size 8)
;; AREF-PD, ROW-MAJOR-AREF-PD
(def-aref-intrinsic #:PD double-sse-pack mem-ref-pd mem-set-pd)
@ -47,6 +55,10 @@
;;; Integer
;; AREF-SI64, ROW-MAJOR-AREF-SI64
(def-aref-intrinsic #:SI64 int-sse-pack mem-ref-si64 mem-set-si64 :ref-size 8)
;; AREF-PI, ROW-MAJOR-AREF-PI
(def-aref-intrinsic #:PI int-sse-pack mem-ref-pi mem-set-pi)

View file

@ -198,7 +198,18 @@
(def-binary-intrinsic cmpunord-ss float-sse-pack cmpss 3 "_mm_cmpunord_ss" :tags (:unord))
(def-binary-intrinsic cmpunord-ps float-sse-pack cmpps 3 "_mm_cmpunord_ps" :tags (:unord) :commutative t)
#| Skipped: _mm_u?comi.*_ss |#
(def-comparison-intrinsic =-ss? float-sse-pack comiss 3 "_mm_comieq_ss" :commutative t :tags (:e))
(def-comparison-intrinsic =-ssu? float-sse-pack ucomiss 3 "_mm_ucomieq_ss" :commutative t :tags (:e))
(def-comparison-intrinsic <-ss? float-sse-pack comiss 3 "_mm_comilt_ss" :tags (:b))
(def-comparison-intrinsic <-ssu? float-sse-pack ucomiss 3 "_mm_ucomilt_ss" :tags (:b))
(def-comparison-intrinsic <=-ss? float-sse-pack comiss 3 "_mm_comile_ss" :tags (:be))
(def-comparison-intrinsic <=-ssu? float-sse-pack ucomiss 3 "_mm_ucomile_ss" :tags (:be))
(def-comparison-intrinsic >-ss? float-sse-pack comiss 3 "_mm_comigt_ss" :tags (:a))
(def-comparison-intrinsic >-ssu? float-sse-pack ucomiss 3 "_mm_ucomigt_ss" :tags (:a))
(def-comparison-intrinsic >=-ss? float-sse-pack comiss 3 "_mm_comige_ss" :tags (:ae))
(def-comparison-intrinsic >=-ssu? float-sse-pack ucomiss 3 "_mm_ucomige_ss" :tags (:ae))
(def-comparison-intrinsic /=-ss? float-sse-pack comiss 3 "_mm_comineq_ss" :commutative t :tags (:ne))
(def-comparison-intrinsic /=-ssu? float-sse-pack ucomiss 3 "_mm_ucomineq_ss" :commutative t :tags (:ne))
;; Misc
@ -338,6 +349,19 @@
(def-binary-intrinsic cmpunord-sd double-sse-pack cmpsd 3 "_mm_cmpunord_sd" :tags (:unord))
(def-binary-intrinsic cmpunord-pd double-sse-pack cmppd 3 "_mm_cmpunord_pd" :tags (:unord) :commutative t)
(def-comparison-intrinsic =-sd? double-sse-pack comisd 3 "_mm_comieq_sd" :commutative t :tags (:e))
(def-comparison-intrinsic =-sdu? double-sse-pack ucomisd 3 "_mm_ucomieq_sd" :commutative t :tags (:e))
(def-comparison-intrinsic <-sd? double-sse-pack comisd 3 "_mm_comilt_sd" :tags (:b))
(def-comparison-intrinsic <-sdu? double-sse-pack ucomisd 3 "_mm_ucomilt_sd" :tags (:b))
(def-comparison-intrinsic <=-sd? double-sse-pack comisd 3 "_mm_comile_sd" :tags (:be))
(def-comparison-intrinsic <=-sdu? double-sse-pack ucomisd 3 "_mm_ucomile_sd" :tags (:be))
(def-comparison-intrinsic >-sd? double-sse-pack comisd 3 "_mm_comigt_sd" :tags (:a))
(def-comparison-intrinsic >-sdu? double-sse-pack ucomisd 3 "_mm_ucomigt_sd" :tags (:a))
(def-comparison-intrinsic >=-sd? double-sse-pack comisd 3 "_mm_comige_sd" :tags (:ae))
(def-comparison-intrinsic >=-sdu? double-sse-pack ucomisd 3 "_mm_ucomige_sd" :tags (:ae))
(def-comparison-intrinsic /=-sd? double-sse-pack comisd 3 "_mm_comineq_sd" :commutative t :tags (:ne))
(def-comparison-intrinsic /=-sdu? double-sse-pack ucomisd 3 "_mm_ucomineq_sd" :commutative t :tags (:ne))
;; Misc
(def-binary-intrinsic unpackhi-pd double-sse-pack unpckhpd 1 "_mm_unpackhi_pd")
@ -541,23 +565,31 @@
(def-unary-intrinsic slli-pi int-sse-pack pslldq 1 "_mm_slli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t)
(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t)
(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t)
(def-sse-int-intrinsic slli-pi16 fixnum int-sse-pack psllw 3 "_mm_slli_epi16" :make-temporary t
:defun-body "_mm_sll_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic slli-pi32 fixnum int-sse-pack pslld 3 "_mm_slli_epi32" :make-temporary t
:defun-body "_mm_sll_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic slli-pi64 fixnum int-sse-pack psllq 3 "_mm_slli_epi64" :make-temporary t
:defun-body "_mm_sll_epi64(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic sll-pi16 int-sse-pack psllw 1 "_mm_sll_epi16")
(def-binary-intrinsic sll-pi32 int-sse-pack pslld 1 "_mm_sll_epi32")
(def-binary-intrinsic sll-pi64 int-sse-pack psllq 1 "_mm_sll_epi64")
(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t)
(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t)
(def-sse-int-intrinsic srai-pi16 fixnum int-sse-pack psraw 3 "_mm_srai_epi16" :make-temporary t
:defun-body "_mm_sra_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srai-pi32 fixnum int-sse-pack psrad 3 "_mm_srai_epi32" :make-temporary t
:defun-body "_mm_sra_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic sra-pi16 int-sse-pack psraw 1 "_mm_sra_epi16")
(def-binary-intrinsic sra-pi32 int-sse-pack psrad 1 "_mm_sra_epi32")
(def-unary-intrinsic srli-pi int-sse-pack psrldq 1 "_mm_srli_si128" :partial :one-arg :immediate-arg (unsigned-byte 8))
(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t)
(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t)
(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t)
(def-sse-int-intrinsic srli-pi16 fixnum int-sse-pack psrlw 3 "_mm_srli_epi16" :make-temporary t
:defun-body "_mm_srl_epi16(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srli-pi32 fixnum int-sse-pack psrld 3 "_mm_srli_epi32" :make-temporary t
:defun-body "_mm_srl_epi32(#0,_mm_cvtsi32_si128(#1))")
(def-sse-int-intrinsic srli-pi64 fixnum int-sse-pack psrlq 3 "_mm_srli_epi64" :make-temporary t
:defun-body "_mm_srl_epi64(#0,_mm_cvtsi32_si128(#1))")
(def-binary-intrinsic srl-pi16 int-sse-pack psrlw 1 "_mm_srl_epi16")
(def-binary-intrinsic srl-pi32 int-sse-pack psrld 1 "_mm_srl_epi32")
(def-binary-intrinsic srl-pi64 int-sse-pack psrlq 1 "_mm_srl_epi64")

View file

@ -30,8 +30,10 @@
#:GIVE-UP-IR1-TRANSFORM #:ABORT-IR1-TRANSFORM
#:INSERT-ARRAY-BOUNDS-CHECKS #:VECTOR-LENGTH
#:ASSERT-ARRAY-RANK #:ASSERT-LVAR-TYPE
#:CONSTANT-LVAR-P #:LVAR-VALUE #:LVAR-TYPE
#:LEXENV-POLICY #:NODE-LEXENV
#:CONSTANT-LVAR-P #:LVAR-VALUE #:LVAR-TYPE #:LVAR-USES
#:LVAR-FUN-NAME #:BASIC-COMBINATION-FUN
#:LEXENV-POLICY #:NODE-LEXENV #:POLICY
#:CAST-P #:CAST-VALUE #:DELETE-FILTER
#:FIND-SAETP #:FIND-SAETP-BY-CTYPE)
#+sbcl
(:import-from #:SB-IMPL