Commit 7142dd73 authored by jinhai's avatar jinhai
Browse files

Merge branch 'fix_thermal_bug' into 'branch-0.5.0'

MS-601 Docker logs error caused by get CPUTemperature error

See merge request megasearch/milvus!666

Former-commit-id: 5dfb1e644bd65d932cd42cb9cede082ff3ab9da9
parents a594ab9a 2cb6a5d1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-577 - Unittest Query randomly hung
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error

## Improvement
- MS-552 - Add and change the easylogging library
+1 −1
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ PrometheusMetrics::Init() {
            return s.code();
        }

        const std::string uri = std::string("/tmp/metrics");
        const std::string uri = std::string("/metrics");
        const std::size_t num_threads = 2;

        // Init Exposer
+39 −14
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
// under the License.

#include "metrics/SystemInfo.h"
#include "utils/Log.h"

#include <nvml.h>
#include <sys/types.h>
@@ -24,6 +25,9 @@
#include <iostream>
#include <string>
#include <utility>
#include<stdlib.h>
#include<dirent.h>
#include<stdio.h>

namespace milvus {
namespace server {
@@ -60,12 +64,12 @@ SystemInfo::Init() {
    nvmlReturn_t nvmlresult;
    nvmlresult = nvmlInit();
    if (NVML_SUCCESS != nvmlresult) {
        printf("System information initilization failed");
        SERVER_LOG_ERROR <<  "System information initilization failed";
        return;
    }
    nvmlresult = nvmlDeviceGetCount(&num_device_);
    if (NVML_SUCCESS != nvmlresult) {
        printf("Unable to get devidce number");
        SERVER_LOG_ERROR << "Unable to get devidce number";
        return;
    }

@@ -151,7 +155,7 @@ SystemInfo::getTotalCpuTime(std::vector<uint64_t>& work_time_array) {
    std::vector<uint64_t> total_time_array;
    FILE* file = fopen("/proc/stat", "r");
    if (file == NULL) {
        perror("Could not open stat file");
        SERVER_LOG_ERROR << "Could not open stat file";
        return total_time_array;
    }

@@ -162,7 +166,7 @@ SystemInfo::getTotalCpuTime(std::vector<uint64_t>& work_time_array) {
        char buffer[1024];
        char* ret = fgets(buffer, sizeof(buffer) - 1, file);
        if (ret == NULL) {
            perror("Could not read stat file");
            SERVER_LOG_ERROR << "Could not read stat file";
            fclose(file);
            return total_time_array;
        }
@@ -237,19 +241,40 @@ SystemInfo::GPUTemperature() {
std::vector<float>
SystemInfo::CPUTemperature() {
    std::vector<float> result;
    for (int i = 0; i <= num_physical_processors_; ++i) {
        std::string path = "/sys/class/thermal/thermal_zone" + std::to_string(i) + "/temp";
        FILE* file = fopen(path.data(), "r");
        if (file == nullptr) {
            perror("Could not open thermal file");
    std::string path = "/sys/class/hwmon/";

    DIR *dir = NULL;
    dir = opendir(path.c_str());
    if (!dir) {
        SERVER_LOG_ERROR << "Could not open hwmon directory";
        return result;
    }

    struct dirent *ptr = NULL;
    while ((ptr = readdir(dir)) != NULL) {
        std::string filename(path);
        filename.append(ptr->d_name);

        char buf[100];
        if (readlink(filename.c_str(), buf, 100) != -1) {
            std::string m(buf);
            if (m.find("coretemp") != std::string::npos) {
                std::string object = filename;
                object += "/temp1_input";
                FILE *file = fopen(object.c_str(), "r");
                if (file == nullptr) {
                    SERVER_LOG_ERROR << "Could not open temperature file"
                    exit(1);
                }
                float temp;
                fscanf(file, "%f", &temp);
                result.push_back(temp / 1000);
        fclose(file);
            }
        }
    }
    closedir(dir);
    return result;
}

std::vector<uint64_t>
SystemInfo::GPUMemoryUsed() {