Skip to content

Commit adcd28c

Browse files
committed
Checkpoint commit of of GPU/CUDA/PTX/Sqlite3/TraceStore progress.
1 parent 0f36234 commit adcd28c

File tree

10 files changed

+656
-29
lines changed

10 files changed

+656
-29
lines changed

Cu/Cu.cuh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*++
2+
3+
Copyright (c) 2017 Trent Nelson <[email protected]>
4+
5+
Module Name:
6+
7+
Cu.cuh
8+
9+
Abstract:
10+
11+
This module is the main header file for the Cu component.
12+
13+
--*/
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
19+
//
20+
// Define NT-style typedefs.
21+
//
22+
23+
typedef char CHAR;
24+
typedef short SHORT;
25+
typedef long LONG;
26+
typedef wchar_t WCHAR; // wc, 16-bit UNICODE character
27+
28+
typedef WCHAR *PWCHAR, *LPWCH, *PWCH;
29+
30+
typedef CHAR *PCHAR, *LPCH, *PCH;
31+
32+
typedef float FLOAT;
33+
typedef double DOUBLE;
34+
typedef FLOAT *PFLOAT;
35+
typedef DOUBLE *PDOUBLE;
36+
37+
typedef unsigned char UCHAR;
38+
typedef unsigned short USHORT;
39+
typedef unsigned long ULONG;
40+
41+
typedef UCHAR *PUCHAR;
42+
typedef USHORT *PUSHORT;
43+
typedef ULONG *PULONG;
44+
45+
typedef CHAR *PCHAR;
46+
typedef SHORT *PSHORT;
47+
typedef LONG *PLONG;
48+
49+
typedef long long LONGLONG;
50+
typedef long long LONG64;
51+
typedef unsigned long long ULONGLONG;
52+
typedef unsigned long long ULONG64;
53+
54+
typedef LONG64 *PLONG64;
55+
typedef ULONG64 *PULONG64;
56+
57+
#define VOID void
58+
59+
//
60+
// Define CUDA macros and typedefs in NT style.
61+
//
62+
63+
#define HOST __host__
64+
#define GLOBAL __global__
65+
#define DEVICE __device__
66+
#define GridDim gridDim
67+
#define BlockDim blockDim
68+
#define BlockIndex blockIdx
69+
#define ThreadIndex threadIdx
70+
71+
72+
#define FOR_EACH_1D(Index, Total) \
73+
for (Index = BlockIndex.x * BlockDim.x + ThreadIndex.x; \
74+
Index < Total; \
75+
Index += BlockDim.x * GridDim.x)
76+
77+
#ifdef __cplusplus
78+
}
79+
#endif
80+
81+
// vim:set ts=8 sw=4 sts=4 tw=80 expandtab syntax=cuda :

Cu/TraceStoreKernels.cu

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,41 @@ Abstract:
1616
extern "C" {
1717
#endif
1818

19-
__global__
20-
void saxpy(int n, float a, float *x, float *y)
19+
#include "Cu.cuh"
20+
#include <no_sal2.h>
21+
22+
GLOBAL
23+
VOID
24+
SinglePrecisionAlphaXPlusY(
25+
_In_ LONG Total,
26+
_In_ FLOAT Alpha,
27+
_In_ PFLOAT X,
28+
_Out_ PFLOAT Y
29+
)
2130
{
22-
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
23-
i < n;
24-
i += blockDim.x * gridDim.x) {
31+
LONG Index;
2532

26-
y[i] = a * x[i] + y[i];
33+
FOR_EACH_1D(Index, Total) {
34+
Y[Index] = Alpha * X[Index] + Y[Index];
35+
}
36+
}
37+
38+
GLOBAL
39+
VOID
40+
DeltaTimestamp(
41+
_In_ ULONG64 Total,
42+
_In_ PULONG64 Timestamp,
43+
_Out_ PULONG64 Delta
44+
)
45+
{
46+
ULONG64 Index;
47+
48+
if (ThreadIndex.x % 32 == 0) {
49+
return;
50+
}
51+
52+
FOR_EACH_1D(Index, Total) {
53+
Delta[Index] = Timestamp[Index] - Timestamp[Index-1];
2754
}
2855
}
2956

Rtl/Cu.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ typedef
483483
_Check_return_
484484
CU_RESULT
485485
(CU_DEVICE_GET)(
486-
_Out_ PCU_DEVICE Device,
486+
_Outptr_result_maybenull_ PCU_DEVICE Device,
487487
_In_opt_ LONG Ordinal
488488
);
489489
typedef CU_DEVICE_GET *PCU_DEVICE_GET;
@@ -624,7 +624,7 @@ CU_RESULT
624624
_In_z_ PCHAR Image,
625625
_In_ LONG NumberOfOptions,
626626
_In_reads_(NumberOfOptions) PCU_JIT_OPTION Options,
627-
_Out_writes_(NumberOfOptions) PPCU_JIT_OPTION OptionValuesPointer
627+
_Out_writes_(NumberOfOptions) PPVOID OptionValuesPointer
628628
);
629629
typedef CU_MODULE_LOAD_DATA_EX *PCU_MODULE_LOAD_DATA_EX;
630630

Rtl/Rtl.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,6 +3001,48 @@ InitializeLargePages(PRTL Rtl)
30013001
return TRUE;
30023002
}
30033003

3004+
PVOID
3005+
TryMapViewOfFileNuma2(
3006+
PRTL Rtl,
3007+
HANDLE FileMappingHandle,
3008+
HANDLE ProcessHandle,
3009+
ULONG64 Offset,
3010+
PVOID BaseAddress,
3011+
SIZE_T ViewSize,
3012+
ULONG AllocationType,
3013+
ULONG PageProtection,
3014+
ULONG PreferredNode
3015+
)
3016+
{
3017+
LARGE_INTEGER FileOffset;
3018+
3019+
if (!Rtl->MapViewOfFileNuma2) {
3020+
goto Fallback;
3021+
}
3022+
3023+
AllocationType = FilterLargePageFlags(Rtl, AllocationType);
3024+
3025+
return Rtl->MapViewOfFileNuma2(FileMappingHandle,
3026+
ProcessHandle,
3027+
Offset,
3028+
BaseAddress,
3029+
ViewSize,
3030+
AllocationType,
3031+
PageProtection,
3032+
PreferredNode);
3033+
3034+
Fallback:
3035+
3036+
FileOffset.QuadPart = Offset;
3037+
return Rtl->MapViewOfFileExNuma(FileMappingHandle,
3038+
PageProtection,
3039+
FileOffset.HighPart,
3040+
FileOffset.LowPart,
3041+
ViewSize,
3042+
BaseAddress,
3043+
PreferredNode);
3044+
}
3045+
30043046
RTL_API PROBE_FOR_READ ProbeForRead;
30053047

30063048
_Use_decl_annotations_
@@ -3143,6 +3185,8 @@ InitializeRtl(
31433185
)
31443186
);
31453187

3188+
Rtl->TryMapViewOfFileNuma2 = TryMapViewOfFileNuma2;
3189+
31463190
Rtl->OutputDebugStringA = OutputDebugStringA;
31473191
Rtl->OutputDebugStringW = OutputDebugStringW;
31483192

Rtl/Rtl.h

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,22 @@ extern "C" {
233233
#define RtlPointerToOffset(B,P) ((ULONG_PTR)(((PCHAR)(P)) - ((PCHAR)(B))))
234234
#endif
235235

236+
#ifndef FlagOn
237+
#define FlagOn(_F,_SF) ((_F) & (_SF))
238+
#endif
239+
240+
#ifndef BooleanFlagOn
241+
#define BooleanFlagOn(F,SF) ((BOOLEAN)(((F) & (SF)) != 0))
242+
#endif
243+
244+
#ifndef SetFlag
245+
#define SetFlag(_F,_SF) ((_F) |= (_SF))
246+
#endif
247+
248+
#ifndef ClearFlag
249+
#define ClearFlag(_F,_SF) ((_F) &= ~(_SF))
250+
#endif
251+
236252
//
237253
// Helper macros.
238254
//
@@ -2334,6 +2350,23 @@ PVOID
23342350
);
23352351
typedef MAP_VIEW_OF_FILE_NUMA2 *PMAP_VIEW_OF_FILE_NUMA2;
23362352

2353+
typedef
2354+
_Ret_maybenull_
2355+
__out_data_source(FILE)
2356+
PVOID
2357+
(WINAPI TRY_MAP_VIEW_OF_FILE_NUMA2)(
2358+
_In_ struct _RTL *Rtl,
2359+
_In_ HANDLE FileMappingHandle,
2360+
_In_ HANDLE ProcessHandle,
2361+
_In_ ULONG64 Offset,
2362+
_In_opt_ PVOID BaseAddress,
2363+
_In_ SIZE_T ViewSize,
2364+
_In_ ULONG AllocationType,
2365+
_In_ ULONG PageProtection,
2366+
_In_ ULONG PreferredNode
2367+
);
2368+
typedef TRY_MAP_VIEW_OF_FILE_NUMA2 *PTRY_MAP_VIEW_OF_FILE_NUMA2;
2369+
23372370
typedef
23382371
BOOL
23392372
(WINAPI FLUSH_VIEW_OF_FILE)(
@@ -5846,6 +5879,10 @@ typedef struct _Struct_size_bytes_(SizeOfStruct) _RTL {
58465879
PVIRTUAL_ALLOC TryLargePageVirtualAlloc;
58475880
PVIRTUAL_ALLOC_EX TryLargePageVirtualAllocEx;
58485881

5882+
PMAP_VIEW_OF_FILE_EX_NUMA MapViewOfFileExNuma;
5883+
PMAP_VIEW_OF_FILE_NUMA2 MapViewOfFileNuma2;
5884+
PTRY_MAP_VIEW_OF_FILE_NUMA2 TryMapViewOfFileNuma2;
5885+
58495886
PATEXIT atexit;
58505887
PATEXITEX AtExitEx;
58515888

@@ -5877,9 +5914,6 @@ typedef struct _Struct_size_bytes_(SizeOfStruct) _RTL {
58775914
PCRYPT_BINARY_TO_STRING_A CryptBinaryToStringA;
58785915
PCRYPT_BINARY_TO_STRING_W CryptBinaryToStringW;
58795916

5880-
PMAP_VIEW_OF_FILE_EX_NUMA MapViewOfFileExNuma;
5881-
PMAP_VIEW_OF_FILE_NUMA2 MapViewOfFileNuma2;
5882-
58835917
POUTPUT_DEBUG_STRING_A OutputDebugStringA;
58845918
POUTPUT_DEBUG_STRING_W OutputDebugStringW;
58855919

@@ -5956,6 +5990,20 @@ typedef struct _Struct_size_bytes_(SizeOfStruct) _RTL {
59565990

59575991
} RTL, *PRTL, **PPRTL;
59585992

5993+
FORCEINLINE
5994+
ULONG
5995+
FilterLargePageFlags(
5996+
_In_ PRTL Rtl,
5997+
_In_ ULONG Flags
5998+
)
5999+
{
6000+
if (!Rtl->Flags.IsLargePageEnabled) {
6001+
return Flags & ~(MEM_LARGE_PAGES | SEC_LARGE_PAGES);
6002+
} else {
6003+
return Flags;
6004+
}
6005+
}
6006+
59596007
FORCEINLINE
59606008
ULONG
59616009
TrailingZeros(
@@ -8482,6 +8530,56 @@ Return Value:
84828530
); \
84838531
} while (0)
84848532

8533+
/*++
8534+
8535+
VOID
8536+
READ_REG_DWORD(
8537+
_In_ HKEY Key,
8538+
_In_ LITERAL Name,
8539+
_In_ PDWORD DwordPointer
8540+
);
8541+
8542+
Routine Description:
8543+
8544+
This is a helper macro for reading REG_DWORD values from the registry.
8545+
8546+
Arguments:
8547+
8548+
Key - Supplies an HKEY handle that represents an open registry key with
8549+
appropriate read access.
8550+
8551+
Name - Name of the registry key to read. This is converted into a literal
8552+
wide character string by the macro (e.g. MaxNoneRefCount will become
8553+
L"MaxNoneRefCount").
8554+
8555+
DwordPointer - Supplies a pointer to a DWORD that will receive the
8556+
registry key value.
8557+
8558+
Return Value:
8559+
8560+
None.
8561+
8562+
N.B. If an error occurs, 0 will be written to DwordPointer.
8563+
8564+
--*/
8565+
8566+
#define READ_REG_DWORD(Key, Name, DwordPointer) do { \
8567+
ULONG DwordLength = sizeof(*DwordPointer); \
8568+
Result = RegGetValueW( \
8569+
Key, \
8570+
NULL, \
8571+
L#Name, \
8572+
RRF_RT_REG_DWORD, \
8573+
NULL, \
8574+
(PVOID)DwordPointer, \
8575+
&DwordLength \
8576+
); \
8577+
if (Result != ERROR_SUCCESS) { \
8578+
*DwordPointer = 0; \
8579+
} \
8580+
} while (0)
8581+
8582+
84858583
/*++
84868584
84878585
VOID

TraceStore/TraceStore.vcxproj

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,6 @@
125125
<Project>{4868dda6-86d8-4fdf-ab71-8f9ea51a1142}</Project>
126126
</ProjectReference>
127127
</ItemGroup>
128-
<ItemGroup>
129-
<None Include="TraceStoreKernels.cu" />
130-
</ItemGroup>
131128
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
132129
<ImportGroup Label="ExtensionTargets">
133130
</ImportGroup>

TraceStore/TraceStore.vcxproj.filters

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,6 @@
144144
<Filter>Source Files</Filter>
145145
</ClCompile>
146146
</ItemGroup>
147-
<ItemGroup>
148-
<None Include="TraceStoreKernels.cu">
149-
<Filter>Source Files</Filter>
150-
</None>
151-
</ItemGroup>
152147
<!--
153148
<ItemGroup>
154149
<MASM Include="..\Rtl\__chkstk.s" Condition="'$(Platform)' == 'x64'">

0 commit comments

Comments
 (0)