/* Copyright 2010 Fredrik Wikstrom. All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
**    notice, this list of conditions and the following disclaimer.
**
** 2. Redistributions in binary form must reproduce the above copyright
**    notice, this list of conditions and the following disclaimer in the
**    documentation and/or other materials provided with the distribution.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
** POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef ENDIAN_H
#define ENDIAN_H

#ifndef __amigaos4__
#include "os4types.h"
#endif

#if (defined(__arm__) && !defined(__ARMEB__)) || defined(__i386__) || defined(__i860__) || defined(__ns32000__) || defined(__vax__) || defined(__amd64__) || defined(__x86_64__)
#undef WORDS_BIGENDIAN
#elif defined(__sparc__) || defined(__alpha__) || defined(__PPC__) || defined(__mips__) || defined(__ppc__) || defined(__M68K__) || defined(__BIG_ENDIAN__)
#define WORDS_BIGENDIAN 1
#endif

#define swap16(x) ((((uint16)(x) & 0x00ff)<<8)| \
	(((uint16)(x) & 0xff00)>>8))
#define swap32(x) ((((uint32)(x) & 0x000000ff)<<24)| \
	(((uint32)(x) & 0x0000ff00)<<8)| \
	(((uint32)(x) & 0x00ff0000)>>8)| \
	(((uint32)(x) & 0xff000000)>>24))
#define swap64(x) ((uint64)swap32((uint64)(x) >> 32)|((uint64)swap32(x) << 32))
#define rswap16(p) swap16(*(uint16 *)(p))
#define rswap32(p) swap32(*(uint32 *)(p))
#define rswap64(p) swap64(*(uint64 *)(p))
#define wswap16(p,x) (*(uint16 *)(p) = swap16(x))
#define wswap32(p,x) (*(uint32 *)(p) = swap32(x))
#define wswap64(p,x) (*(uint64 *)(p) = swap64(x))

#ifdef WORDS_BIGENDIAN
#define h2be16(x)  (x)
#define h2be32(x)  (x)
#define h2be64(x)  (x)
#define h2le16(x)  swap16(x)
#define h2le32(x)  swap32(x)
#define h2le64(x)  swap64(x)
#define rbe16(p)   (*(uint16 *)(p))
#define rbe32(p)   (*(uint32 *)(p))
#define rbe64(p)   (*(uint64 *)(p))
#define rle16(p)   rswap16(p)
#define rle32(p)   rswap32(p)
#define rle64(p)   rswap64(p)
#define wbe16(p,x) (*(uint16 *)(p) = (x))
#define wbe32(p,x) (*(uint32 *)(p) = (x))
#define wbe64(p,x) (*(uint64 *)(p) = (x))
#define wle16(p,x) wswap16(p,x)
#define wle32(p,x) wswap32(p,x)
#define wle64(p,x) wswap64(p,x)
#else
#define h2le16(x)  (x)
#define h2le32(x)  (x)
#define h2le64(x)  (x)
#define h2be16(x)  swap16(x)
#define h2be32(x)  swap32(x)
#define h2be64(x)  swap64(x)
#define rle16(p)   (*(uint16 *)(p))
#define rle32(p)   (*(uint32 *)(p))
#define rle64(p)   (*(uint64 *)(p))
#define rbe16(p)   rswap16(p)
#define rbe32(p)   rswap32(p)
#define rbe64(p)   rswap64(p)
#define wle16(p,x) (*(uint16 *)(p) = (x))
#define wle32(p,x) (*(uint32 *)(p) = (x))
#define wle64(p,x) (*(uint64 *)(p) = (x))
#define wbe16(p,x) wswap16(p,x)
#define wbe32(p,x) wswap32(p,x)
#define wbe64(p,x) wswap64(p,x)
#endif

#if defined(__VBCC__) && defined(__PPC__)

#undef swap16
#undef swap32
#undef swap64

int16 swap16(__reg("r4") int16) =
	"\trlwinm\t3,4,8,16,24\n"
	"\trlwimi\t3,4,24,24,31";

int32 swap32(__reg("r4") int32) =
	"\trlwinm\t3,4,24,0,31\n"
	"\trlwimi\t3,4,8,8,15\n"
	"\trlwimi\t3,4,8,24,31";

int64 swap64(__reg("r5/r6") int64) =
	"\trlwinm\t4,5,24,0,31\n"
	"\trlwinm\t3,6,24,0,31\n"
	"\trlwimi\t4,5,8,8,15\n"
	"\trlwimi\t3,6,8,8,15\n"
	"\trlwimi\t4,5,8,24,31\n"
	"\trlwimi\t3,6,8,24,31";

#undef rswap16
#undef rswap32
#undef rswap64

int16 rswap16(__reg("r3") void *) =
	"\tlhbrx\t3,0,3";

int32 rswap32(__reg("r3") void *) =
	"\tlwbrx\t3,0,3";

int64 rswap64(__reg("r3") void *) =
	"\taddi\t5,3,4\n" // r5 = r3 + 4
	"\tlwbrx\t4,0,3\n"
	"\tlwbrx\t3,0,5";

#undef wswap16
#undef wswap32
#undef wswap64

void wswap16(__reg("r3") void *, __reg("r4") int16) =
	"\tsthbrx\t4,0,3";

void wswap32(__reg("r3") void *, __reg("r4") int32) =
	"\tstwbrx\t4,0,3";

void wswap64(__reg("r3") void *, __reg("r5/r6") int64) =
	"\taddi\t4,3,4\n" // r4 = r3 + 4
	"\tstwbrx\t6,0,3\n"
	"\tstwbrx\t5,0,4";

#endif /* defined(__VBCC__) && defined(__PPC__) */

#if defined(__GNUC__) && defined(__PPC__)

#undef swap16
#undef swap32
#undef swap64

static inline uint32 swap16(uint16 x) {
	uint32 res;
	asm("rlwinm %0,%1,8,16,23;"
		"rlwimi %0,%1,24,24,31;"
		: "=&r" (res)
		: "r" (x));
	return res;
}

static inline uint32 swap32(uint32 x) {
	uint32 res;
	asm("rlwinm %0,%1,24,0,31;"
		"rlwimi %0,%1,8,8,15;"
		"rlwimi %0,%1,8,24,31;"
		: "=&r" (res)
		: "r" (x));
	return res;
}

static inline uint64 swap64(uint64 x) {
	uint64 res;
	asm("rlwinm %L0,%M1,24,0,31;"
		"rlwinm %M0,%L1,24,0,31;"
		"rlwimi %L0,%M1,8,8,15;"
		"rlwimi %M0,%L1,8,8,15;"
		"rlwimi %L0,%M1,8,24,31;"
		"rlwimi %M0,%L1,8,24,31;"
		: "=&r" (res)
		: "r" (x));
	return res;
}

#undef rswap16
#undef rswap32
#undef rswap64

static inline uint32 rswap16(void *p) {
	uint32 res;
	asm("lhbrx %0,0,%1;"
		: "=r" (res)
		: "r" (p));
	return res;
}

static inline uint32 rswap32(void *p) {
	uint32 res;
	asm("lwbrx %0,0,%1;"
		: "=r" (res)
		: "r" (p));
	return res;
}

static inline uint64 rswap64(void *p) {
	uint64 res;
	const uint32 y = 4;
	asm("lwbrx %L0,0,%1;"
		"lwbrx %M0,%2,%1;"
		: "=&r" (res)
		: "r" (p), "r" (y));
	return res;
}

#undef wswap16
#undef wswap32
#undef wswap64

static inline void wswap16(void *p, uint16 x) {
	asm("sthbrx %1,0,%0;"
		:
		: "r" (p), "r" (x));
}

static inline void wswap32(void *p, uint32 x) {
	asm("stwbrx %1,0,%0;"
		:
		: "r" (p), "r" (x));
}

static inline void wswap64(void *p, uint64 x) {
	const uint32 y = 4;
	asm("stwbrx %L1,0,%0;"
		"stwbrx %M1,%2,%0;"
		:
		: "r" (p), "r" (x), "r" (y));
}
#endif /* defined(__GNUC__) && defined(__PPC__) */

#if defined(__GNUC__) && (defined(__i386__) || defined(__i486__))

#ifndef USE_BSWAP
#define USE_BSWAP 1
#endif

#undef swap16
#undef swap32

static inline uint16 swap16(uint16 x) {
	uint16 res;
	asm("rorw $8,%w0;"
		: "=r" (res)
		: "0" (x)
		: "cc");
	return res;
}

static inline uint32 swap32(uint32 x) {
	uint32 res;
#if USE_BSWAP
	asm("bswap %0;"
		: "=r" (res)
		: "0" (x));
#else
	asm("rorw $8,%w0;"
		"rorl $16,%0;"
		"rorw $8,%w0;"
		: "=r" (res)
		: "0" (x)
		: "cc");
#endif
	return res;
}

#endif

#if defined(__VBCC__) && defined(__M68K__)

#undef swap16
#undef swap32
#undef swap64

static inline uint16 swap16(__reg("d0") uint16) =
	"\trol.w\t#8,d0";

static inline uint32 swap32(__reg("d0") uint32) =
	"\trol.w\t#8,d0\n"
	"\tswap\td0\n"
	"\trol.w\t#8,d0";

static inline uint64 swap64(__reg("d0/d1") uint64) =
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1\n"
	"\tswap\td0\n"
	"\tswap\td1\n"
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1\n"
	"\teor.l\td0,d1\n"
	"\teor.l\td1,d0\n"
	"\teor.l\td0,d1";

#undef rswap64

static inline uint64 rswap64(__reg("a0") void *) =
	"\tmove.l\t4(a0),d0\n"
	"\tmove.l\t(a0),d1\n"
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1\n"
	"\tswap\td0\n"
	"\tswap\td1\n"
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1";

#undef wswap64

static inline uint64 wswap64(__reg("a0") void *, __reg("d0/d1") uint64) =
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1\n"
	"\tswap\td0\n"
	"\tswap\td1\n"
	"\trol.w\t#8,d0\n"
	"\trol.w\t#8,d1\n"
	"\tmove.l\td0,4(a0)\n"
	"\tmove.l\td1,(a0)";

#endif /* defined(__VBCC__) && defined(__M68K__) */

#endif /* ENDIAN_H */
