Classified TJS-16 spacecraft, companion object emerges alongside earlier TJS-15 satellite

-rough draft

https://spacenews.com/china-launches-classified-tjs-16-spacecraft-companion-object-emerges-alongside-earlier-tjs-15-satellite/

additional object purposed for signal amplification

Feliks Tomasz Konczakowski

https://en.wikipedia.org/wiki/List_of_largest_optical_refracting_telescopes

cm surface resolution

Very Large Array https://public.nrao.edu/telescopes/vla/

Number of antennas	28 (27 active and 1 spare)
Dish size	25 meters
Weight	230 tons
Latitude	34°04'43.497" North
Longitude	107°37'03.819" West
Dish Surface	Aluminum panels accurate up to .5 mm
BUS	Steel
Frequencies	From 1.0 GHz to 50 GHz
Resolution	0.2 arcseconds to 0.04 arcseconds
Reconfigurable Array	North arm is 11 miles long, two other arms are 13 miles long
A configuration size	22.62 miles across
B configuration size	7.08 miles across
C configuration size	2.11 miles across
D configuration size	0.64 miles across

https://github.com/luigifcruz/blade/blob/aef4bf5230ab64af7b3e22de60e71f4bb52d6aae/benchmarks/pipelines/ata/mode-b/mode_b.cc#L131-L147

#include <memory>
#include <cassert>

#include "blade/base.hh"
#include "blade/logger.hh"
#include "blade/runner.hh"
#include "blade/plan.hh"
#include "blade/pipelines/ata/mode_b.hh"

extern "C" {
#include "mode_b.h"
}

using namespace Blade;
using namespace Blade::Pipelines::ATA;

using TestPipeline = ModeB<BLADE_ATA_MODE_B_OUTPUT_ELEMENT_T>;

static std::unique_ptr<Runner<TestPipeline>> runner;
static Tensor<Device::CPU, F64> dummyJulianDate({1});
static Tensor<Device::CPU, F64> dummyDut1({1});

bool blade_ata_b_initialize(U64 numberOfWorkers) {
    if (runner) {
        BL_FATAL("Can't initialize because Blade Runner is already initialized.");
        BL_CHECK_THROW(Result::ASSERTION_ERROR);
    }

    dummyJulianDate[0] = (1649366473.0 / 86400) + 2440587.5;
    dummyDut1[0] = 0.0;

    runner = Runner<TestPipeline>::New(numberOfWorkers, {
        .inputShape = ArrayShape({
            BLADE_ATA_MODE_B_NANT,
            BLADE_ATA_MODE_B_NCHAN,
            BLADE_ATA_MODE_B_NTIME,
            BLADE_ATA_MODE_B_NPOL,
        }),

        .preBeamformerChannelizerRate = BLADE_ATA_MODE_B_CHANNELIZER_RATE,

        .phasorObservationFrequencyHz = 6500.125*1e6,
        .phasorChannelBandwidthHz = 0.5e6,
        .phasorTotalBandwidthHz = 1.024e9,
        .phasorFrequencyStartIndex = 352,
        .phasorReferenceAntennaIndex = 0,
        .phasorArrayReferencePosition = {
            .LON = BL_DEG_TO_RAD(-121.470733), 
            .LAT = BL_DEG_TO_RAD(40.815987),
            .ALT = 1020.86,
        },
        .phasorBoresightCoordinate = {
            .RA = 0.64169,
            .DEC = 1.079896295,
        },
        .phasorAntennaPositions = {
            {-2524041.5388905862, -4123587.965024342, 4147646.4222955606},    // 1c 
            {-2524068.187873109, -4123558.735413135, 4147656.21282186},       // 1e 
            {-2524087.2078100787, -4123532.397416349, 4147670.9866770394},    // 1g 
            {-2524103.384010733, -4123511.111598937, 4147682.4133068994},     // 1h 
            {-2524056.730228759, -4123515.287949227, 4147706.4850287656},     // 1k 
            {-2523986.279601761, -4123497.427940991, 4147766.732988923},      // 2a 
            {-2523970.301363642, -4123515.238502669, 4147758.790023165},      // 2b 
            {-2523983.5419911123, -4123528.1422073604, 4147737.872218138},    // 2c 
            {-2523941.5221860334, -4123568.125040547, 4147723.8292249846},    // 2e 
            {-2524074.096220788, -4123468.5182652213, 4147742.0422435375},    // 2h 
            {-2524058.6409591637, -4123466.5112451194, 4147753.4513993543},   // 2j 
            {-2524026.989692545, -4123480.9405167866, 4147758.2356800516},    // 2l 
            {-2524048.5254066754, -4123468.3463909747, 4147757.835369889},    // 2k 
            {-2524000.5641107005, -4123498.2984570004, 4147756.815976133},    // 2m 
            {-2523945.086670364, -4123480.3638816103, 4147808.127865142},     // 3d 
            {-2523950.6822576034, -4123444.7023326857, 4147839.7474427638},   // 3l 
            {-2523880.869769226, -4123514.3375464156, 4147813.413426994},     // 4e 
            {-2523930.3747946257, -4123454.3080821196, 4147842.6449955846},   // 4g 
            {-2523898.1150373477, -4123456.314794732, 4147860.3045849088},    // 4j 
            {-2523824.598229116, -4123527.93080514, 4147833.98936114}         // 5b
        },
        .phasorAntennaCalibrations = ArrayTensor<Device::CPU, CF64>({
            BLADE_ATA_MODE_B_NANT,
            BLADE_ATA_MODE_B_NCHAN * BLADE_ATA_MODE_B_CHANNELIZER_RATE,
            1,
            BLADE_ATA_MODE_B_NPOL,
        }),
        .phasorBeamCoordinates = {
            {0.63722, 1.07552424},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
            {0.64169, 1.079896295},
        },

        .beamformerIncoherentBeam = BLADE_ATA_MODE_B_ENABLE_INCOHERENT_BEAM,

        .detectorEnable = BLADE_ATA_MODE_B_DETECTOR_ENABLED,
        .detectorIntegrationSize = BLADE_ATA_MODE_B_DETECTOR_INTEGRATION,
        .detectorNumberOfOutputPolarizations = BLADE_ATA_MODE_B_DETECTOR_POLS,
    });

    // Terminate if profiling.
    if (Memory::Profiler::IsCapturing()) {
        blade_ata_b_terminate();
    }

    return true;
}

void blade_ata_b_terminate() {
    if (!runner) {
        BL_FATAL("Can't terminate because Blade Runner isn't initialized.");
        BL_CHECK_THROW(Result::ASSERTION_ERROR);
    }
    runner.reset();
}

U64 blade_ata_b_get_input_size() {
    assert(runner);
    return runner->getWorker().getInputBuffer().size();
}

U64 blade_ata_b_get_output_size() {
    assert(runner);
    return runner->getWorker().getOutputBuffer().size();
}

bool blade_ata_b_enqueue(void* input_ptr, void* output_ptr, U64 id) {
    assert(runner);

    return runner->enqueue([&](auto& worker) {
        // Convert C pointers to Blade::Vector.
        auto input = ArrayTensor<Device::CPU, CI8>(input_ptr, worker.getInputBuffer().shape());
        auto output = ArrayTensor<Device::CPU, BLADE_ATA_MODE_B_OUTPUT_ELEMENT_T>(output_ptr, 
                worker.getOutputBuffer().shape());

        // Transfer input data from CPU memory to the worker.
        Plan::TransferIn(worker, dummyJulianDate, dummyDut1, input);

        // Compute block.
        Plan::Compute(worker);

        // Transfer output data from the worker to the CPU memory.
        Plan::TransferOut(output, worker.getOutputBuffer(), worker);

        return id;
    });
}

bool blade_ata_b_dequeue(U64* id) {
    assert(runner);
    return runner->dequeue(id);
}

https://github.com/luigifcruz/blade/blob/aef4bf5230ab64af7b3e22de60e71f4bb52d6aae/include/blade/runner.hh

#ifndef BLADE_RUNNER_HH
#define BLADE_RUNNER_HH

#include <deque>
#include <vector>
#include <memory>

#include "blade/logger.hh"
#include "blade/module.hh"
#include "blade/pipeline.hh"
#include "blade/macros.hh"

namespace Blade {

template<class Pipeline>
class BLADE_API Runner {
 public:
    static std::unique_ptr<Runner<Pipeline>> New(const U64& numberOfWorkers,
                                                 const typename Pipeline::Config& config,
                                                 const BOOL& printET = true) {
        return std::make_unique<Runner<Pipeline>>(numberOfWorkers, config, printET);
    }

    explicit Runner(const U64& numberOfWorkers,
                    const typename Pipeline::Config& config,
                    const BOOL& printET = true) {
        if (printET) {
            BL_LOG_PRINT_ET();
        }

        BL_INFO("Instantiating new runner.");

        if (numberOfWorkers == 0) {
            BL_FATAL("Number of worker has to be larger than zero.");
            BL_CHECK_THROW(Result::ASSERTION_ERROR);
        }

        for (U64 i = 0; i < numberOfWorkers; i++) {
            BL_DEBUG("Initializing new worker.");
            workers.push_back(std::make_unique<Pipeline>(config));
        }
    }

    constexpr Pipeline& getWorker(const U64& index = 0) const {
        return *workers[index];
    }

    constexpr const U64& getHead() const {
        return head;
    }
    
    constexpr const bool slotAvailable() const {
        return jobs.size() != workers.size();
    }

    constexpr const bool empty() const {
        return jobs.size() == 0;
    }
    
    constexpr Pipeline& getNextWorker() {
        return *workers[head];
    }

    const Result applyToAllWorkers(const std::function<const Result(Pipeline&)>& modifier,
                                   const bool block = false) {
        for (auto& worker : workers) {
             BL_CHECK(modifier(*worker));
        }

        if (block) {
            for (auto& worker : workers) {
                 BL_CHECK(worker->synchronize());
            }
        }

        return Result::SUCCESS;
    }

    bool enqueue(const std::function<const U64(Pipeline&)>& jobFunc) {
        // Return if there are no workers available.
        if (jobs.size() == workers.size()) {
            return false;
        }

        try {
            jobs.push_back({
                .id = jobFunc(*workers[head]),
                .worker = workers[head],
            });
        } catch (const Result& err) {
            // Print user friendly error and issue fatal error.
            if (err == Result::PLAN_ERROR_ACCUMULATION_COMPLETE) {
                BL_FATAL("Can't accumulate block because buffer is full.");
                BL_CHECK_THROW(err);
            }

            if (err == Result::PLAN_ERROR_DESTINATION_NOT_SYNCHRONIZED) {
                BL_FATAL("Can't transfer data because destination is not synchronized.");
                BL_CHECK_THROW(err);
            }

            if (err == Result::PLAN_ERROR_NO_ACCUMULATOR) {
                BL_FATAL("This mode doesn't support accumulation.");
                BL_CHECK_THROW(err);
            }

            if (err == Result::PLAN_ERROR_NO_SLOT) {
                BL_FATAL("No slot available after compute. Data has nowhere to go.")
                BL_CHECK_THROW(err);
            }

            // Ignore if throw was a skip operation.
            if (err == Result::PLAN_SKIP_ACCUMULATION_INCOMPLETE || 
                err == Result::PLAN_SKIP_COMPUTE_INCOMPLETE ||
                err == Result::PLAN_SKIP_USER_INITIATED ||
                err == Result::PLAN_SKIP_NO_DEQUEUE || 
                err == Result::PLAN_SKIP_NO_SLOT) {
                return false;
            }

            // Ignore if throw originates from exhaustion.
            if (err == Result::EXHAUSTED) {
                return false;
            }

            BL_FATAL("Unknown error.");

            // Fatal error otherwise.
            BL_CHECK_THROW(err);
        }

        // Bump job queue head index.
        head = (head + 1) % workers.size();

        return true;
    }

    bool dequeue(U64* id) {
        // Return if there are no jobs.
        if (jobs.size() == 0) {
            return false;
        }

        const auto& job = jobs.front();

        // Synchronize front if all workers have jobs.
        if (jobs.size() == workers.size()) {
            job.worker->synchronize();
        }

        // Return if front isn't synchronized.
        if (!job.worker->isSynchronized()) {
            return false;
        }

        if (id != nullptr) {
            *id = job.id;
        }

        jobs.pop_front();

        return true;
    }

 private:
    struct Job {
        U64 id;
        std::unique_ptr<Pipeline>& worker;
    };

    U64 head = 0;
    std::deque<Job> jobs;
    std::vector<std::unique_ptr<Pipeline>> workers;
};

}  // namespace Blade

#endif

https://github.com/luigifcruz/blade/blob/aef4bf5230ab64af7b3e22de60e71f4bb52d6aae/src/pipeline.cc

#define BL_LOG_DOMAIN "PIPELINE"

#include "blade/pipeline.hh"

namespace Blade {

Pipeline::Pipeline(const U64& numberOfAccumulationSteps,
                   const U64& numberOfComputeSteps)
     : state(State::IDLE),
       numberOfAccumulationSteps(numberOfAccumulationSteps),
       numberOfComputeSteps(numberOfComputeSteps),
       accumulationStepCounter(0),
       computeStepCounter(0),
       currentComputeCount(0) {
    BL_INFO("Pipeline with {} accumulation and {} compute steps.", 
            numberOfAccumulationSteps, numberOfComputeSteps);

    BL_CUDA_CHECK_THROW(cudaStreamCreateWithFlags(&this->stream,
            cudaStreamNonBlocking), [&]{
        BL_FATAL("Failed to create stream for CUDA steam: {}", err);
    });
}

Pipeline::~Pipeline() {
    this->synchronize();
    if (this->state == State::GRAPH) {
        cudaGraphDestroy(this->graph);
    }
    cudaStreamDestroy(this->stream);
}

const U64 Pipeline::incrementAccumulatorStep() {
    return ++accumulationStepCounter;
}

const U64 Pipeline::resetAccumulatorSteps() {
    const auto& previous = accumulationStepCounter;
    accumulationStepCounter = 0;
    return previous;
}

const U64 Pipeline::incrementComputeStep() {
    return ++computeStepCounter;
}

const U64 Pipeline::resetComputeSteps() {
    const auto& previous = computeStepCounter;
    computeStepCounter = 0;
    return previous;
}

const Result Pipeline::synchronize() {
    BL_CUDA_CHECK(cudaStreamSynchronize(this->stream), [&]{
        BL_FATAL("Failed to synchronize stream: {}", err);
    });
    return Result::SUCCESS;
}

bool Pipeline::isSynchronized() {
    return cudaStreamQuery(this->stream) == cudaSuccess;
}

const Result Pipeline::compute() {
    for (auto& module : this->modules) {
        BL_CHECK(module->preprocess(this->stream, this->currentComputeCount));
    }

    switch (state) {
        case State::GRAPH:
            BL_CUDA_CHECK(cudaGraphLaunch(this->instance, this->stream), [&]{
                BL_FATAL("Failed launch CUDA graph: {}", err);
            });
            break;
        case State::CACHED:
            BL_DEBUG("Creating CUDA Graph.");
            BL_CUDA_CHECK(cudaStreamBeginCapture(this->stream,
                cudaStreamCaptureModeGlobal), [&]{
                BL_FATAL("Failed to begin the capture of CUDA Graph: {}", err);
            });

            for (auto& module : this->modules) {
                BL_CHECK(module->process(this->stream));
            }

            BL_CUDA_CHECK(cudaStreamEndCapture(this->stream, &this->graph), [&]{
                BL_FATAL("Failed to end the capture of CUDA Graph: {}", err);
            });

            BL_CUDA_CHECK(cudaGraphInstantiate(&this->instance, this->graph,
                    NULL, NULL, 0), [&]{
                BL_FATAL("Failed to instantiate CUDA Graph: {}", err);
            });

            BL_CUDA_CHECK(cudaGraphLaunch(this->instance, this->stream), [&]{
                BL_FATAL("Failed launch CUDA graph: {}", err);
            });

            this->state = State::GRAPH;
            break;
        case State::IDLE:
            BL_DEBUG("Caching kernels ahead of CUDA Graph instantiation.");
            for (auto& module : this->modules) {
                BL_CHECK(module->process(this->stream));
            }
            this->state = State::CACHED;
            break;
        default:
            BL_FATAL("Internal error.");
            return Result::ERROR;
    }

    BL_CUDA_CHECK_KERNEL([&]{
        BL_FATAL("Failed to process: {}", err);
        return Result::CUDA_ERROR;
    });

    this->currentComputeCount += 1;

    return Result::SUCCESS;
}

}  // namespace Blade

https://github.com/luigifcruz/blade/blob/aef4bf5230ab64af7b3e22de60e71f4bb52d6aae/src/modules/beamformer/beamformer.cu

#include "cuComplex.h"
#include <stdint.h>

__device__ cuFloatComplex detect(const cuFloatComplex a) {
    return make_cuFloatComplex((a.x * a.x) + (a.y * a.y), 0.0f);
}

template<uint64_t NBEAMS, uint64_t NANTS, uint64_t NCHANS,
         uint64_t NTIME, uint64_t NPOLS, uint64_t TBLOCK,
         bool EnableIncoherentBeam, bool EnableIncoherentBeamSqrt>
__global__ void ATA(const cuFloatComplex* input,
                    const cuFloatComplex* phasor,
                          cuFloatComplex* out) {
    int bi = threadIdx.x;
    int ti = bi + (blockIdx.y * TBLOCK);
    int ch = blockIdx.x;

    // Load the phasors to shared memory.
    __shared__ cuFloatComplex phr_cache[NBEAMS][NANTS][NPOLS];

    int iy = (ch * NPOLS) + (bi * NPOLS * NCHANS * NANTS);
    const int dy = NPOLS * NCHANS;

    if (bi < NBEAMS) {
        for (int a = 0; a < NANTS; a++, iy += dy) {
            phr_cache[bi][a][0] = phasor[iy+0];
            phr_cache[bi][a][1] = phasor[iy+1];
        }
    }

    __syncthreads();

    // Load the antenna values to registers.
    cuFloatComplex ant_cache[NANTS][NPOLS];

    int ix = (ch * NTIME * NPOLS) + (ti * NPOLS);
    const int dx = NTIME * NCHANS * NPOLS;

    for (int a = 0; a < NANTS; a++, ix += dx) {
        ant_cache[a][0] = input[ix+0];
        ant_cache[a][1] = input[ix+1];
    }

    // Multiply and accumulate.
    int iz = (ch * NTIME) + ti;
    const int dz = NTIME * NCHANS;

    for (int b = 0; b < NBEAMS; b++, iz += dz) {
        cuFloatComplex acc[NPOLS] = {{0.0, 0.0}};

        for (int a = 0; a < NANTS; a++) {
            acc[0] = cuCaddf(acc[0], cuCmulf(ant_cache[a][0], phr_cache[b][a][0]));
            acc[1] = cuCaddf(acc[1], cuCmulf(ant_cache[a][1], phr_cache[b][a][1]));
        }

        reinterpret_cast<float4*>(out)[iz] = *reinterpret_cast<float4*>(acc);
    }

    if (EnableIncoherentBeam) {
        cuFloatComplex acc[NPOLS] = {{0.0, 0.0}};

        for (int a = 0; a < NANTS; a++) {
            acc[0] = cuCaddf(acc[0], detect(cuCmulf(ant_cache[a][0], phr_cache[0][a][0])));
            acc[1] = cuCaddf(acc[1], detect(cuCmulf(ant_cache[a][1], phr_cache[0][a][1])));
        }

        if (EnableIncoherentBeamSqrt) {
            acc[0] = make_cuFloatComplex(sqrt(acc[0].x), acc[0].y);
            acc[1] = make_cuFloatComplex(sqrt(acc[1].x), acc[1].y);
        }

        reinterpret_cast<float4*>(out)[iz] = *reinterpret_cast<float4*>(acc);
    }
}

template<uint64_t NBEAMS, uint64_t NANTS, uint64_t NCHANS,
         uint64_t NTIME, uint64_t NPOLS, uint64_t TBLOCK,
         bool EnableIncoherentBeam, bool EnableIncoherentBeamSqrt>
__global__ void MeerKAT(const cuFloatComplex* input,
                        const cuFloatComplex* phasor,
                              cuFloatComplex* out) {
    int bi = threadIdx.x;
    int ti = bi + (blockIdx.y * TBLOCK);
    int ch = blockIdx.x;

    // Load the antenna values to registers.
    cuFloatComplex ant_cache[NANTS][NPOLS];

    int ix = (ch * NTIME * NPOLS) + (ti * NPOLS);
    const int dx = NTIME * NCHANS * NPOLS;

    for (int a = 0; a < NANTS; a++, ix += dx) {
        ant_cache[a][0] = input[ix+0];
        ant_cache[a][1] = input[ix+1];
    }

    // Multiply and accumulate.
    int iy = 0;
    int iz = (ch * NTIME) + ti;
    const int dz = NTIME * NCHANS;

    for (int b = 0; b < NBEAMS; b++, iz += dz) {
        cuFloatComplex acc[NPOLS] = {{0.0, 0.0}};

        for (int a = 0, x = ix; a < NANTS; a++, iy += 1, x += dx) {
            acc[0] = cuCaddf(acc[0], cuCmulf(ant_cache[a][0], phasor[iy]));
            acc[1] = cuCaddf(acc[1], cuCmulf(ant_cache[a][1], phasor[iy]));
        }

        reinterpret_cast<float4*>(out)[iz] = *reinterpret_cast<float4*>(acc);
    }
}

20250330

TJS-16 spacecraft and companion object for signal amplification -rough draft

Very Large Array https://public.nrao.edu/telescopes/vla/

No comments:

Post a Comment