I figured it out.

I didn't remember cearn's example code correctly.
TheLazy1's function GRF_Decompress
Code: Select all
int GRF_Decompress( u32* Source, void* Dest ) {
u32 Header;
u32 Size;
if ( Source && Dest ) {
Source+= 2;
Header = *Source;
Size = Header >> 8;
DC_FlushRange( Source, Size );
switch ( Header & 0xF0 ) {
case 0x00: {
dmaCopy( Source + 1, Dest, Size );
break;
}
case 0x10: {
decompress( Source, Dest, LZ77Vram );
break;
}
case 0x20: {
decompress( Source, Dest, HUFF );
break;
}
case 0x30: {
decompress( Source, Dest, RLEVram );
break;
}
default: return 0;
};
// This fixed it :)
//DC_InvalidateRange( Dest, Size );
DC_FlushRange( Dest, Size );
return 1;
}
return 0;
}
changed version
Code: Select all
int GRF_Decompress( u32* Source, void* Dest ) {
u32 Header;
u32 Size;
if ( Source && Dest ) {
Source+= 2;
Header = *Source;
Size = Header >> 8;
switch ( Header & 0xF0 ) {
case 0x00: {
DC_FlushRange( Source + 1, Size );
DC_FlushRange( Dest, Size );
dmaCopy( Source + 1, Dest, Size );
break;
}
case 0x10: {
decompress( Source, Dest, LZ77Vram );
break;
}
case 0x20: {
decompress( Source, Dest, HUFF );
break;
}
case 0x30: {
decompress( Source, Dest, RLEVram );
break;
}
default: return 0;
};
// This fixed it :)
//DC_InvalidateRange( Dest, Size );
return 1;
}
return 0;
}
cearn's dmaCopySafish also works instead of the stuff in case 0x00
Code: Select all
#define CACHE_LINE_SIZE 32
// Assuming cached regions. Add tests for that yourself.
void dmaCopySafish(const void *src, void *dst, u32 size)
{
DC_FlushRange(src, size); // Flush source.
u32 addr= (u32)dst;
if(addr % CACHE_LINE_SIZE) // Check head
DC_FlushRange((void*)(addr), 1);
if((addr+size) % CACHE_LINE_SIZE) // Check tail.
DC_FlushRange((void*)(addr+size), 1);
dmaCopy(src, dst, size); // Actual copy.
DC_InvalidateRange(dst, size); // Final invalidate.
}
main memory to main memory dma isn't supposed to be faster then assembly memcpy without using async so it probably doesn't matter.
Is it useful to store graphics data uncompressed on flash anyway? Flash is supposed to be slow. So having the data smaller in the slowest place might be a good idea.
memory corruption is sure annoying to debug.

I had no idea what was going on.