diff --git a/doc/process/weekly/week-04/group/weekly-summary-04.md b/doc/process/weekly/week-05/group/weekly-summary-04.md similarity index 100% rename from doc/process/weekly/week-04/group/weekly-summary-04.md rename to doc/process/weekly/week-05/group/weekly-summary-04.md diff --git a/doc/process/weekly/week-05/group/weekly-summary-05.md b/doc/process/weekly/week-06/group/weekly-summary-05.md similarity index 100% rename from doc/process/weekly/week-05/group/weekly-summary-05.md rename to doc/process/weekly/week-06/group/weekly-summary-05.md diff --git a/doc/process/weekly/week-06/group/weekly-summary-06.md b/doc/process/weekly/week-07/group/weekly-summary-06.md similarity index 100% rename from doc/process/weekly/week-06/group/weekly-summary-06.md rename to doc/process/weekly/week-07/group/weekly-summary-06.md diff --git a/doc/process/weekly/week-07/group/weekly-summary-07.md b/doc/process/weekly/week-08/group/weekly-summary-07.md similarity index 100% rename from doc/process/weekly/week-07/group/weekly-summary-07.md rename to doc/process/weekly/week-08/group/weekly-summary-07.md diff --git a/doc/process/weekly/week-08/group/weekly-summary-08.md b/doc/process/weekly/week-09/group/weekly-summary-08.md similarity index 100% rename from doc/process/weekly/week-08/group/weekly-summary-08.md rename to doc/process/weekly/week-09/group/weekly-summary-08.md diff --git a/doc/process/weekly/week-09/group/weekly-summary-09.md b/doc/process/weekly/week-10/group/weekly-summary-09.md similarity index 100% rename from doc/process/weekly/week-09/group/weekly-summary-09.md rename to doc/process/weekly/week-10/group/weekly-summary-09.md diff --git a/doc/project/02-设计文档/README.md b/doc/project/02-设计文档/README.md deleted file mode 100644 index 24505c5..0000000 --- a/doc/project/02-设计文档/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# 02-设计文档 - -占位:说明该目录存放系统与详细设计文档。 \ No newline at end of file diff --git a/doc/project/03-计划文档/README.md b/doc/project/03-计划文档/README.md deleted file mode 100644 index 346ea0a..0000000 --- a/doc/project/03-计划文档/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# 03-计划文档 - -占位:说明该目录存放项目阶段/迭代计划相关文档。 \ No newline at end of file diff --git a/doc/project/04-用户手册/README.md b/doc/project/04-用户手册/README.md deleted file mode 100644 index 4f474b0..0000000 --- a/doc/project/04-用户手册/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# 04-用户手册 - -占位:说明该目录存放最终用户手册及相关资料。 \ No newline at end of file diff --git a/doc/project/05-测试报告/README.md b/doc/project/05-测试报告/README.md deleted file mode 100644 index a50e5b8..0000000 --- a/doc/project/05-测试报告/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# 05-测试报告 - -占位:说明该目录存放测试结果与分析报告。 \ No newline at end of file diff --git a/src/backend/app/algorithms/evaluates/clip/__init__.py b/src/backend/app/algorithms/evaluates/clip/__init__.py new file mode 100644 index 0000000..dcc5619 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/clip/__init__.py @@ -0,0 +1 @@ +from .clip import * diff --git a/src/backend/app/algorithms/evaluates/clip/bpe_simple_vocab_16e6.txt.gz b/src/backend/app/algorithms/evaluates/clip/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000..7b5088a Binary files /dev/null and b/src/backend/app/algorithms/evaluates/clip/bpe_simple_vocab_16e6.txt.gz differ diff --git a/src/backend/app/algorithms/evaluates/clip/clip.py b/src/backend/app/algorithms/evaluates/clip/clip.py new file mode 100644 index 0000000..f7a5da5 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/clip/clip.py @@ -0,0 +1,245 @@ +import hashlib +import os +import urllib +import warnings +from typing import Any, Union, List +from pkg_resources import packaging + +import torch +from PIL import Image +from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize +from tqdm import tqdm + +from .model import build_model +from .simple_tokenizer import SimpleTokenizer as _Tokenizer + +try: + from torchvision.transforms import InterpolationMode + BICUBIC = InterpolationMode.BICUBIC +except ImportError: + BICUBIC = Image.BICUBIC + + +if packaging.version.parse(torch.__version__) < packaging.version.parse("1.7.1"): + warnings.warn("PyTorch version 1.7.1 or higher is recommended") + + +__all__ = ["available_models", "load", "tokenize"] +_tokenizer = _Tokenizer() + +_MODELS = { + "RN50": "https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + "RN101": "https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + "RN50x4": "https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", + "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", + "RN50x64": "https://openaipublic.azureedge.net/clip/models/be1cfb55d75a9666199fb2206c106743da0f6468c9d327f3e0d0a543a9919d9c/RN50x64.pt", + "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", + "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", + "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt", +} + + +def _download(url: str, root: str): + os.makedirs(root, exist_ok=True) + filename = os.path.basename(url) + + expected_sha256 = url.split("/")[-2] + download_target = os.path.join(root, filename) + + if os.path.exists(download_target) and not os.path.isfile(download_target): + raise RuntimeError(f"{download_target} exists and is not a regular file") + + if os.path.isfile(download_target): + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: + return download_target + else: + warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") + + with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: + while True: + buffer = source.read(8192) + if not buffer: + break + + output.write(buffer) + loop.update(len(buffer)) + + if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: + raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match") + + return download_target + + +def _convert_image_to_rgb(image): + return image.convert("RGB") + + +def _transform(n_px): + return Compose([ + Resize(n_px, interpolation=BICUBIC), + CenterCrop(n_px), + _convert_image_to_rgb, + ToTensor(), + Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), + ]) + + +def available_models() -> List[str]: + """Returns the names of available CLIP models""" + return list(_MODELS.keys()) + + +def load(name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit: bool = False, download_root: str = None): + """Load a CLIP model + + Parameters + ---------- + name : str + A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict + + device : Union[str, torch.device] + The device to put the loaded model + + jit : bool + Whether to load the optimized JIT model or more hackable non-JIT model (default). + + download_root: str + path to download the model files; by default, it uses "~/.cache/clip" + + Returns + ------- + model : torch.nn.Module + The CLIP model + + preprocess : Callable[[PIL.Image], torch.Tensor] + A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input + """ + if name in _MODELS: + model_path = _download(_MODELS[name], download_root or os.path.expanduser("~/.cache/clip")) + elif os.path.isfile(name): + model_path = name + else: + raise RuntimeError(f"Model {name} not found; available models = {available_models()}") + + with open(model_path, 'rb') as opened_file: + try: + # loading JIT archive + model = torch.jit.load(opened_file, map_location=device if jit else "cpu").eval() + state_dict = None + except RuntimeError: + # loading saved state dict + if jit: + warnings.warn(f"File {model_path} is not a JIT archive. Loading as a state dict instead") + jit = False + state_dict = torch.load(opened_file, map_location="cpu") + + if not jit: + model = build_model(state_dict or model.state_dict()).to(device) + if str(device) == "cpu": + model.float() + return model, _transform(model.visual.input_resolution) + + # patch the device names + device_holder = torch.jit.trace(lambda: torch.ones([]).to(torch.device(device)), example_inputs=[]) + device_node = [n for n in device_holder.graph.findAllNodes("prim::Constant") if "Device" in repr(n)][-1] + + def _node_get(node: torch._C.Node, key: str): + """Gets attributes of a node which is polymorphic over return type. + + From https://github.com/pytorch/pytorch/pull/82628 + """ + sel = node.kindOf(key) + return getattr(node, sel)(key) + + def patch_device(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("prim::Constant"): + if "value" in node.attributeNames() and str(_node_get(node, "value")).startswith("cuda"): + node.copyAttributes(device_node) + + model.apply(patch_device) + patch_device(model.encode_image) + patch_device(model.encode_text) + + # patch dtype to float32 on CPU + if str(device) == "cpu": + float_holder = torch.jit.trace(lambda: torch.ones([]).float(), example_inputs=[]) + float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] + float_node = float_input.node() + + def patch_float(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("aten::to"): + inputs = list(node.inputs()) + for i in [1, 2]: # dtype can be the second or third argument to aten::to() + if _node_get(inputs[i].node(), "value") == 5: + inputs[i].node().copyAttributes(float_node) + + model.apply(patch_float) + patch_float(model.encode_image) + patch_float(model.encode_text) + + model.float() + + return model, _transform(model.input_resolution.item()) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77, truncate: bool = False) -> Union[torch.IntTensor, torch.LongTensor]: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + truncate: bool + Whether to truncate the text in case its encoding is longer than the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]. + We return LongTensor when torch version is <1.8.0, since older index_select requires indices to be long. + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + if packaging.version.parse(torch.__version__) < packaging.version.parse("1.8.0"): + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + else: + result = torch.zeros(len(all_tokens), context_length, dtype=torch.int) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + if truncate: + tokens = tokens[:context_length] + tokens[-1] = eot_token + else: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = torch.tensor(tokens) + + return result diff --git a/src/backend/app/algorithms/evaluates/clip/model.py b/src/backend/app/algorithms/evaluates/clip/model.py new file mode 100644 index 0000000..232b779 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/clip/model.py @@ -0,0 +1,436 @@ +from collections import OrderedDict +from typing import Tuple, Union + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.relu1 = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.relu2 = nn.ReLU(inplace=True) + + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu3 = nn.ReLU(inplace=True) + + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential(OrderedDict([ + ("-1", nn.AvgPool2d(stride)), + ("0", nn.Conv2d(inplanes, planes * self.expansion, 1, stride=1, bias=False)), + ("1", nn.BatchNorm2d(planes * self.expansion)) + ])) + + def forward(self, x: torch.Tensor): + identity = x + + out = self.relu1(self.bn1(self.conv1(x))) + out = self.relu2(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu3(out) + return out + + +class AttentionPool2d(nn.Module): + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + self.positional_embedding = nn.Parameter(torch.randn(spacial_dim ** 2 + 1, embed_dim) / embed_dim ** 0.5) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.flatten(start_dim=2).permute(2, 0, 1) # NCHW -> (HW)NC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (HW+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (HW+1)NC + x, _ = F.multi_head_attention_forward( + query=x[:1], key=x, value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat([self.q_proj.bias, self.k_proj.bias, self.v_proj.bias]), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False + ) + return x.squeeze(0) + + +class ModifiedResNet(nn.Module): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2d(3, width // 2, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(width // 2) + self.relu1 = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(width // 2, width // 2, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(width // 2) + self.relu2 = nn.ReLU(inplace=True) + self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(width) + self.relu3 = nn.ReLU(inplace=True) + self.avgpool = nn.AvgPool2d(2) + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + def stem(x): + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.relu3(self.bn3(self.conv3(x))) + x = self.avgpool(x) + return x + + x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + ret = super().forward(x.type(torch.float32)) + return ret.type(orig_type) + + +class QuickGELU(nn.Module): + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor = None): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential(OrderedDict([ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)) + ])) + self.ln_2 = LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: torch.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0] + + def forward(self, x: torch.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor = None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x: torch.Tensor): + return self.resblocks(x) + + +class VisionTransformer(nn.Module): + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False) + + scale = width ** -0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) + self.ln_pre = LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def forward(self, x: torch.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat([self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class CLIP(nn.Module): + def __init__(self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int + ): + super().__init__() + + self.context_length = context_length + + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet( + layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width + ) + else: + vision_heads = vision_width // 64 + self.visual = VisionTransformer( + input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim + ) + + self.transformer = Transformer( + width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask() + ) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = nn.Parameter(torch.empty(self.context_length, transformer_width)) + self.ln_final = LayerNorm(transformer_width) + + self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim)) + self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + + self.initialize_parameters() + + def initialize_parameters(self): + nn.init.normal_(self.token_embedding.weight, std=0.02) + nn.init.normal_(self.positional_embedding, std=0.01) + + if isinstance(self.visual, ModifiedResNet): + if self.visual.attnpool is not None: + std = self.visual.attnpool.c_proj.in_features ** -0.5 + nn.init.normal_(self.visual.attnpool.q_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.k_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.v_proj.weight, std=std) + nn.init.normal_(self.visual.attnpool.c_proj.weight, std=std) + + for resnet_block in [self.visual.layer1, self.visual.layer2, self.visual.layer3, self.visual.layer4]: + for name, param in resnet_block.named_parameters(): + if name.endswith("bn3.weight"): + nn.init.zeros_(param) + + proj_std = (self.transformer.width ** -0.5) * ((2 * self.transformer.layers) ** -0.5) + attn_std = self.transformer.width ** -0.5 + fc_std = (2 * self.transformer.width) ** -0.5 + for block in self.transformer.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + + if self.text_projection is not None: + nn.init.normal_(self.text_projection, std=self.transformer.width ** -0.5) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + return mask + + @property + def dtype(self): + return self.visual.conv1.weight.dtype + + def encode_image(self, image): + return self.visual(image.type(self.dtype)) + + def encode_text(self, text): + x = self.token_embedding(text).type(self.dtype) # [batch_size, n_ctx, d_model] + + x = x + self.positional_embedding.type(self.dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.transformer(x) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x).type(self.dtype) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection + + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=1, keepdim=True) + text_features = text_features / text_features.norm(dim=1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = logit_scale * image_features @ text_features.t() + logits_per_text = logits_per_image.t() + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text + + +def convert_weights(model: nn.Module): + """Convert applicable model parameters to fp16""" + + def _convert_weights_to_fp16(l): + if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): + l.weight.data = l.weight.data.half() + if l.bias is not None: + l.bias.data = l.bias.data.half() + + if isinstance(l, nn.MultiheadAttention): + for attr in [*[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], "in_proj_bias", "bias_k", "bias_v"]: + tensor = getattr(l, attr) + if tensor is not None: + tensor.data = tensor.data.half() + + for name in ["text_projection", "proj"]: + if hasattr(l, name): + attr = getattr(l, name) + if attr is not None: + attr.data = attr.data.half() + + model.apply(_convert_weights_to_fp16) + + +def build_model(state_dict: dict): + vit = "visual.proj" in state_dict + + if vit: + vision_width = state_dict["visual.conv1.weight"].shape[0] + vision_layers = len([k for k in state_dict.keys() if k.startswith("visual.") and k.endswith(".attn.in_proj_weight")]) + vision_patch_size = state_dict["visual.conv1.weight"].shape[-1] + grid_size = round((state_dict["visual.positional_embedding"].shape[0] - 1) ** 0.5) + image_resolution = vision_patch_size * grid_size + else: + counts: list = [len(set(k.split(".")[2] for k in state_dict if k.startswith(f"visual.layer{b}"))) for b in [1, 2, 3, 4]] + vision_layers = tuple(counts) + vision_width = state_dict["visual.layer1.0.conv1.weight"].shape[0] + output_width = round((state_dict["visual.attnpool.positional_embedding"].shape[0] - 1) ** 0.5) + vision_patch_size = None + assert output_width ** 2 + 1 == state_dict["visual.attnpool.positional_embedding"].shape[0] + image_resolution = output_width * 32 + + embed_dim = state_dict["text_projection"].shape[1] + context_length = state_dict["positional_embedding"].shape[0] + vocab_size = state_dict["token_embedding.weight"].shape[0] + transformer_width = state_dict["ln_final.weight"].shape[0] + transformer_heads = transformer_width // 64 + transformer_layers = len(set(k.split(".")[2] for k in state_dict if k.startswith("transformer.resblocks"))) + + model = CLIP( + embed_dim, + image_resolution, vision_layers, vision_width, vision_patch_size, + context_length, vocab_size, transformer_width, transformer_heads, transformer_layers + ) + + for key in ["input_resolution", "context_length", "vocab_size"]: + if key in state_dict: + del state_dict[key] + + convert_weights(model) + model.load_state_dict(state_dict) + return model.eval() diff --git a/src/backend/app/algorithms/evaluates/clip/simple_tokenizer.py b/src/backend/app/algorithms/evaluates/clip/simple_tokenizer.py new file mode 100644 index 0000000..0a66286 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/clip/simple_tokenizer.py @@ -0,0 +1,132 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8+n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152-256-2+1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v+'' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + ( token[-1] + '',) + pairs = get_pairs(word) + + if not pairs: + return token+'' + + while True: + bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word)-1 and word[i+1] == second: + new_word.append(first+second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/src/backend/app/algorithms/evaluates/eva_gen_heatmap.py b/src/backend/app/algorithms/evaluates/eva_gen_heatmap.py new file mode 100644 index 0000000..fb27936 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/eva_gen_heatmap.py @@ -0,0 +1,520 @@ +"""Stable Diffusion 注意力热力图差异可视化工具 (可靠版 - 语义阶段聚合)。 + +本模块使用一种健壮的方法,通过在 Stable Diffusion 扩散模型(U-Net)的 +**早期时间步 (语义阶段)** 捕获并累加交叉注意力权重。这种方法能确保捕获到的 +注意力图信号集中且可靠,用于对比分析干净输入和扰动输入生成的图像对模型 +注意力机制的影响差异。 + +典型用法: + python eva_gen_heatmap.py \\ + --model_path /path/to/sd_model \\ + --image_path_a /path/to/clean_image.png \\ + --image_path_b /path/to/noisy_image.png \\ + --prompt_text "a photo of sks person" \\ + --target_word "sks" \\ + --output_dir output/heatmap_reports +""" + +# 通用参数解析与文件路径管理 +import argparse +import os +from pathlib import Path +from typing import Dict, Any, List, Tuple + +# 数值计算与深度学习依赖 +import torch +import torch.nn.functional as F +import numpy as np +import itertools +import warnings + +# 可视化依赖 +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +from matplotlib.colors import TwoSlopeNorm +from mpl_toolkits.axes_grid1 import make_axes_locatable + +# Diffusers 与 Transformers 依赖 +from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler +from diffusers.models.attention_processor import Attention +from transformers import CLIPTokenizer + +# 图像处理与数据读取 +from PIL import Image +from torchvision import transforms + +# 抑制非必要的警告输出 +warnings.filterwarnings("ignore", category=UserWarning) +warnings.filterwarnings("ignore", category=FutureWarning) + + +# ============== 核心模块:注意力捕获与聚合 ============== + +class AttentionMapProcessor: + """自定义注意力处理器,用于捕获 U-Net 交叉注意力层的权重。 + + 通过替换原始的 `Attention` 模块处理器,该类在模型前向传播过程中, + 将所有交叉注意力层的注意力权重(`attention_probs`)捕获并存储。 + + Attributes: + attention_maps (Dict[str, List[torch.Tensor]]): 存储捕获到的注意力图, + 键为层名称,值为该层在不同时间步捕获到的注意力图列表。 + pipeline (StableDiffusionPipeline): 正在处理的 Stable Diffusion 管线。 + original_processors (Dict[str, Any]): 存储原始的注意力处理器,用于恢复。 + current_layer_name (Optional[str]): 当前正在处理的注意力层的名称。 + """ + + def __init__(self, pipeline: StableDiffusionPipeline): + """初始化注意力处理器。 + + Args: + pipeline: Stable Diffusion 模型管线实例。 + """ + self.attention_maps: Dict[str, List[torch.Tensor]] = {} + self.pipeline = pipeline + self.original_processors = {} + self.current_layer_name = None + self._set_processors() + + def __call__( + self, + attn: Attention, + hidden_states: torch.Tensor, + encoder_hidden_states: torch.Tensor = None, + attention_mask: torch.Tensor = None + ) -> torch.Tensor: + """重载 __call__ 方法,执行注意力计算并捕获权重。 + + 此方法替代了原始的 `Attention.processor`,在计算交叉注意力时进行捕获。 + + Args: + attn: 当前的 `Attention` 模块实例。 + hidden_states: U-Net 隐状态 (query)。 + encoder_hidden_states: 文本编码器输出 (key/value),即交叉注意力输入。 + attention_mask: 注意力掩码。 + + Returns: + 计算后的输出隐状态。 + """ + # 如果不是交叉注意力(即 encoder_hidden_states 为 None),则调用原始处理器 + if encoder_hidden_states is None: + return attn.processor( + attn, hidden_states, encoder_hidden_states, attention_mask + ) + + # 1. 计算 Q, K, V + query = attn.to_q(hidden_states) + key = attn.to_k(encoder_hidden_states) + value = attn.to_v(encoder_hidden_states) + + # 2. 准备矩阵乘法 + query = attn.head_to_batch_dim(query) + key = attn.head_to_batch_dim(key) + + # 3. 计算 Attention Scores (Q @ K^T) + attention_scores = torch.baddbmm( + torch.empty( + query.shape[0], query.shape[1], key.shape[1], + dtype=query.dtype, device=query.device + ), + query, + key.transpose(1, 2), + beta=0, + alpha=attn.scale, + ) + + # 4. 计算 Attention Probabilities + attention_probs = attention_scores.softmax(dim=-1) + layer_name = self.current_layer_name + + # 5. 存储捕获的注意力图 + if layer_name not in self.attention_maps: + self.attention_maps[layer_name] = [] + + # 存储当前时间步的注意力权重 + self.attention_maps[layer_name].append(attention_probs.detach().cpu()) + + # 6. 计算输出 (Attention @ V) + value = attn.head_to_batch_dim(value) + hidden_states = torch.bmm(attention_probs, value) + hidden_states = attn.batch_to_head_dim(hidden_states) + + # 7. 输出层 + hidden_states = attn.to_out[0](hidden_states) + hidden_states = attn.to_out[1](hidden_states) + + return hidden_states + + def _set_processors(self): + """注册自定义处理器,捕获 U-Net 中所有交叉注意力层的权重。 + + 遍历 U-Net 的所有子模块,找到所有交叉注意力层(`Attention` 且名称包含 `attn2`), + 并将其处理器替换为当前的实例。 + """ + for name, module in self.pipeline.unet.named_modules(): + if isinstance(module, Attention) and 'attn2' in name: + # 存储原始处理器以便后续恢复 + self.original_processors[name] = module.processor + + # 定义一个新的闭包函数,用于在调用前设置当前层的名称 + def set_layer_name(current_name): + def new_call(*args, **kwargs): + self.current_layer_name = current_name + return self.__call__(*args, **kwargs) + return new_call + + module.processor = set_layer_name(name) + + def remove(self): + """恢复 U-Net 的原始注意力处理器,清理钩子。""" + for name, original_processor in self.original_processors.items(): + module = self.pipeline.unet.get_submodule(name) + module.processor = original_processor + self.attention_maps = {} + + +def aggregate_word_attention( + attention_maps: Dict[str, List[torch.Tensor]], + tokenizer: CLIPTokenizer, + target_word: str, + input_ids: torch.Tensor +) -> np.ndarray: + """聚合所有层和语义时间步中目标词汇的注意力图,并进行归一化。 + + 聚合步骤: + 1. 识别目标词汇对应的 Token 索引。 + 2. 对每个层:将所有捕获时间步的注意力图求平均。 + 3. 提取目标 Token 对应的注意力子图,并对 Token 维度求和,对 Attention Heads 求平均。 + 4. 将不同分辨率的注意力图上采样到统一尺寸(64x64)。 + 5. 对所有层的结果进行累加(求和)。 + 6. 最终归一化到 [0, 1]。 + + Args: + attention_maps: 包含各层和时间步捕获的注意力图的字典。 + tokenizer: CLIP 分词器实例。 + target_word: 需要聚焦的关键词。 + input_ids: Prompt 对应的 Token ID 张量。 + + Returns: + 最终聚合并上采样到 64x64 尺寸的注意力热力图 (NumPy 数组)。 + + Raises: + ValueError: 如果无法在 Prompt 中找到目标词汇。 + RuntimeError: 如果未捕获到任何注意力数据。 + """ + + # 1. 识别目标词汇的 Token 索引 + prompt_tokens = tokenizer.convert_ids_to_tokens( + input_ids.squeeze().cpu().tolist() + ) + target_lower = target_word.lower() + target_indices = [] + + for i, token in enumerate(prompt_tokens): + cleaned_token = token.replace('Ġ', '').replace('_', '').lower() + # 查找目标词汇或以目标词汇开头的 token 索引,并排除特殊 token + if (input_ids.squeeze()[i] not in tokenizer.all_special_ids and + (target_lower in cleaned_token or + cleaned_token.startswith(target_lower))): + target_indices.append(i) + + if not target_indices: + print(f"[WARN] 目标词汇 '{target_word}' 未识别。请检查 Prompt 或 Target Word。") + raise ValueError("无法识别目标词汇的 token 索引。") + + # 2. 聚合逻辑 + all_attention_data = [] + # U-Net 输出的最大分辨率(64x64),总像素点数 + TARGET_SPATIAL_SIZE = 4096 + TARGET_MAP_SIZE = 64 + + for layer_name, step_maps in attention_maps.items(): + if not step_maps: + continue + + # 对该层捕获的所有时间步求平均,形状: (batch, heads, spatial_res, target_tokens_len) + avg_map_over_time = torch.stack(step_maps).mean(dim=0) + + # 移除批次维度 (假设 batch size = 1),形状: (heads, spatial_res, target_tokens_len) + attention_map = avg_map_over_time.squeeze(0) + + # 提取目标 token 的注意力图。形状: (heads, spatial_res, target_indices_len) + target_token_maps = attention_map[:, :, target_indices] + + # 对目标 token 求和 (dim=-1),对注意力头求平均 (dim=0),形状: (spatial_res,) + aggregated_map_flat = target_token_maps.sum(dim=-1).mean(dim=0).float() + + # 3. 跨分辨率上采样 + if aggregated_map_flat.shape[0] != TARGET_SPATIAL_SIZE: + # 当前图的尺寸:16x16 (256) 或 32x32 (1024) + map_size = int(np.sqrt(aggregated_map_flat.shape[0])) + map_2d = aggregated_map_flat.reshape(map_size, map_size) + map_to_interp = map_2d.unsqueeze(0).unsqueeze(0) # [1, 1, H, W] + + # 使用双线性插值上采样到 64x64 + resized_map_2d = F.interpolate( + map_to_interp, + size=(TARGET_MAP_SIZE, TARGET_MAP_SIZE), + mode='bilinear', + align_corners=False + ) + resized_map_flat = resized_map_2d.squeeze().flatten() + all_attention_data.append(resized_map_flat) + else: + # 如果已经是 64x64,直接使用 + all_attention_data.append(aggregated_map_flat) + + if not all_attention_data: + raise RuntimeError("未捕获到注意力数据。可能模型或参数设置有误。") + + # 4. 对所有层的结果进行累加 (求和) + final_map_flat = torch.stack(all_attention_data).sum(dim=0).cpu().numpy() + + # 5. 最终归一化到 [0, 1] + final_map_flat = final_map_flat / (final_map_flat.max() + 1e-6) + + map_size = int(np.sqrt(final_map_flat.shape[0])) + final_map_np = final_map_flat.reshape(map_size, map_size) # 64x64 + + return final_map_np + + +def get_attention_map_from_image( + pipeline: StableDiffusionPipeline, + image_path: str, + prompt_text: str, + target_word: str +) -> Tuple[Image.Image, np.ndarray]: + """执行多时间步前向传播,捕获指定图片和 Prompt 的注意力图。 + + 通过只运行扩散过程中的语义阶段(早期时间步)来确保捕获到的注意力权重 + 具有高信号质量。 + + Args: + pipeline: Stable Diffusion 模型管线实例。 + image_path: 待处理的输入图片路径。 + prompt_text: 用于生成图片的 Prompt 文本。 + target_word: 需要聚焦和可视化的关键词。 + + Returns: + 包含 (原始图片, 最终上采样后的注意力图) 的元组。 + """ + print(f"\n-> 正在处理图片: {Path(image_path).name}") + image = Image.open(image_path).convert("RGB").resize((512, 512)) + image_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.5], [0.5]), + ]) + image_tensor = ( + image_transform(image) + .unsqueeze(0) + .to(pipeline.device) + .to(pipeline.unet.dtype) + ) + + # 1. 编码到 Latent 空间 + with torch.no_grad(): + latent = ( + pipeline.vae.encode(image_tensor).latent_dist.sample() * + pipeline.vae.config.scaling_factor + ) + + # 2. 编码 Prompt + text_input = pipeline.tokenizer( + prompt_text, + padding="max_length", + max_length=pipeline.tokenizer.model_max_length, + truncation=True, + return_tensors="pt" + ) + input_ids = text_input.input_ids + + with torch.no_grad(): + # 获取文本嵌入 + prompt_embeds = pipeline.text_encoder( + input_ids.to(pipeline.device) + )[0] + + # 3. 定义语义时间步 + scheduler = pipeline.scheduler + # 设置扩散步数 (例如 50 步) + scheduler.set_timesteps(50, device=pipeline.device) + + # 只选择语义最丰富的早期 10 步进行捕获 + semantic_steps = scheduler.timesteps[:10] + print(f"-> 正在对语义阶段的 {len(semantic_steps)} 个时间步进行注意力捕获...") + + + processor = AttentionMapProcessor(pipeline) + + try: + # 4. 运行多步 UNet Forward Pass + with torch.no_grad(): + # 在选定的语义时间步上运行 U-Net 预测 + for t in semantic_steps: + pipeline.unet(latent, t, prompt_embeds, return_dict=False) + + # 5. 聚合捕获到的数据 + raw_map_np = aggregate_word_attention( + processor.attention_maps, + pipeline.tokenizer, + target_word, + input_ids + ) + except Exception as e: + print(f"[ERROR] 注意力聚合失败: {e}") + # 确保清理钩子 + raw_map_np = np.zeros(image.size) + finally: + processor.remove() + + # 6. 注意力图上采样到图片尺寸 (512x512) + # PIL 进行上采样 + heat_map_pil = Image.fromarray((raw_map_np * 255).astype(np.uint8)) + heat_map_np_resized = ( + np.array(heat_map_pil.resize( + image.size, + resample=Image.Resampling.LANCZOS # 使用高质量的 Lanczos 滤波器 + )) / 255.0 + ) + + return image, heat_map_np_resized + + +def main(): + """主函数,负责解析参数,加载模型,计算差异并生成可视化报告。""" + parser = argparse.ArgumentParser(description="SD 图片注意力差异可视化报告生成") + parser.add_argument("--model_path", type=str, required=True, + help="Stable Diffusion 模型本地路径。") + parser.add_argument("--image_path_a", type=str, required=True, + help="干净输入图片 (X) 路径。") + parser.add_argument("--image_path_b", type=str, required=True, + help="扰动输入图片 (X') 路径。") + parser.add_argument("--prompt_text", type=str, default="a photo of sks person", + help="用于生成图片的 Prompt 文本。") + parser.add_argument("--target_word", type=str, default="sks", + help="需要在注意力图中聚焦和可视化的关键词。") + parser.add_argument("--output_dir", type=str, default="output", + help="报告 PNG 文件的输出目录。") + args = parser.parse_args() + + print(f"--- 正在生成 Stable Diffusion 注意力差异报告 ---") + + # ---------------- 准备模型 ---------------- + device = 'cuda' if torch.cuda.is_available() else 'cpu' + dtype = torch.float16 if device == 'cuda' else torch.float32 + + try: + # 加载 Stable Diffusion 管线 + pipe = StableDiffusionPipeline.from_pretrained( + args.model_path, + torch_dtype=dtype, + local_files_only=True, + safety_checker=None, + # 从子文件夹加载调度器配置 + scheduler=DPMSolverMultistepScheduler.from_pretrained(args.model_path, subfolder="scheduler") + ).to(device) + except Exception as e: + print(f"[ERROR] 模型加载失败,请检查路径和环境依赖: {e}") + return + + # ---------------- 获取数据 ---------------- + # 获取干净图片 A 的注意力图 M_A + img_A, map_A = get_attention_map_from_image(pipe, args.image_path_a, args.prompt_text, args.target_word) + # 获取扰动图片 B 的注意力图 M_B + img_B, map_B = get_attention_map_from_image(pipe, args.image_path_b, args.prompt_text, args.target_word) + + if map_A.shape != map_B.shape: + print("错误:注意力图尺寸不匹配。中止处理。") + return + + # 计算差异图: Delta = M_A - M_B + diff_map = map_A - map_B + # 计算 L2 范数(差异距离) + l2_diff = np.linalg.norm(diff_map) + print(f"\n计算完毕,注意力图的 L2 范数差异值: {l2_diff:.4f}") + + # ---------------- 绘制专业报告 ---------------- + + # 设置 Matplotlib 字体样式 + plt.rcParams.update({ + 'font.family': 'serif', + 'font.serif': ['DejaVu Serif', 'Times New Roman', 'serif'], + 'mathtext.fontset': 'cm' + }) + + fig = plt.figure(figsize=(12, 16), dpi=120) + + # 3行 x 4列 网格布局,用于图片和图例的精确控制 + gs = gridspec.GridSpec(3, 4, figure=fig, + height_ratios=[1, 1, 1.3], + hspace=0.3, wspace=0.1) + + # --- 第一行:原始图片 --- + ax_img_a = fig.add_subplot(gs[0, 0:2]) + ax_img_b = fig.add_subplot(gs[0, 2:4]) + + # 干净图片 + ax_img_a.imshow(img_A) + ax_img_a.set_title(f"Clean Image ($X$)\nFilename: {Path(args.image_path_a).name}", fontsize=14, pad=10) + ax_img_a.axis('off') + + # 扰动图片 + ax_img_b.imshow(img_B) + ax_img_b.set_title(f"Noisy Image ($X'$)\nFilename: {Path(args.image_path_b).name}", fontsize=14, pad=10) + ax_img_b.axis('off') + + # --- 第二行:注意力热力图 (Jet配色) --- + ax_map_a = fig.add_subplot(gs[1, 0:2]) + ax_map_b = fig.add_subplot(gs[1, 2:4]) + + # 注意力图 A + im_map_a = ax_map_a.imshow(map_A, cmap='jet', vmin=0, vmax=1) + ax_map_a.set_title(f"Attention Heatmap ($M_X$)\nTarget: \"{args.target_word}\"", fontsize=14, pad=10) + ax_map_a.axis('off') + + # 注意力图 B + im_map_b = ax_map_b.imshow(map_B, cmap='jet', vmin=0, vmax=1) + ax_map_b.set_title(f"Attention Heatmap ($M_{{X'}}$)\nTarget: \"{args.target_word}\"", fontsize=14, pad=10) + ax_map_b.axis('off') + + # 为注意力图 B 绘制颜色指示条 + divider = make_axes_locatable(ax_map_b) + cax_map = divider.append_axes("right", size="5%", pad=0.05) + cbar1 = fig.colorbar(im_map_b, cax=cax_map) + cbar1.set_label('Attention Intensity', fontsize=10) + + # --- 第三行:差异对比 (完美居中) --- + # 差异图在网格的中间两列 + ax_diff = fig.add_subplot(gs[2, 1:3]) + + vmax_diff = np.max(np.abs(diff_map)) + # 使用 TwoSlopeNorm 确保 0 值位于色条中央 + norm_diff = TwoSlopeNorm(vmin=-vmax_diff, vcenter=0., vmax=vmax_diff) + + # 使用 Coolwarm 配色,蓝色表示负差异 (M_X' > M_X),红色表示正差异 (M_X > M_X') + im_diff = ax_diff.imshow(diff_map, cmap='coolwarm', norm=norm_diff) + + title_text = ( + r"Difference Map: $\Delta = M_X - M_{X'}$" + + f"\n$L_2$ Norm Distance: $\mathbf{{{l2_diff:.4f}}}$" + ) + ax_diff.set_title(title_text, fontsize=16, pad=12) + ax_diff.axis('off') + + # 差异图颜色指示条 (居中对齐) + cbar2 = fig.colorbar(im_diff, ax=ax_diff, fraction=0.046, pad=0.04) + cbar2.set_label(r'Scale: Red ($+$) $\leftrightarrow$ Blue ($-$)', fontsize=12) + + # ---------------- 整体修饰与保存 ---------------- + fig.suptitle(f"Museguard: SD Attention Analysis Report", fontsize=20, fontweight='bold', y=0.95) + + output_filename = "heatmap_dif.png" + output_path = Path(args.output_dir) / output_filename + output_path.parent.mkdir(parents=True, exist_ok=True) + + plt.savefig(output_path, bbox_inches='tight', facecolor='white') + print(f"\n专业分析报告已保存至:\n{output_path.resolve()}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/backend/app/algorithms/evaluates/eva_gen_nums.py b/src/backend/app/algorithms/evaluates/eva_gen_nums.py new file mode 100644 index 0000000..31041bd --- /dev/null +++ b/src/backend/app/algorithms/evaluates/eva_gen_nums.py @@ -0,0 +1,513 @@ +"""图像生成质量多维度评估工具 (专业重构版)。 + +本脚本用于对比评估两组图像(Clean vs Perturbed)的生成质量。 +支持生成包含指标对比表和深度差异分析的 PNG 报告。 + +Style Guide: Google Python Style Guide +""" + +import os +import time +import subprocess +import tempfile +import warnings +from argparse import ArgumentParser +from pathlib import Path +from typing import Dict, Optional, Tuple, Any + +import torch +import clip +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +from PIL import Image +from torchvision import transforms +from facenet_pytorch import MTCNN, InceptionResnetV1 +from piq import ssim, psnr +import torch_fidelity as fid + +# 抑制非必要的警告输出 +warnings.filterwarnings("ignore", category=UserWarning) +warnings.filterwarnings("ignore", category=FutureWarning) + +# ----------------------------------------------------------------------------- +# 全局配置与样式 +# ----------------------------------------------------------------------------- + +# Matplotlib LaTeX 风格配置 +plt.rcParams.update({ + 'font.family': 'serif', + 'font.serif': ['DejaVu Serif', 'Times New Roman', 'serif'], + 'mathtext.fontset': 'cm', + 'axes.unicode_minus': False +}) + +# 指标元数据配置:定义指标目标方向和分析阈值 +METRIC_ANALYSIS_META = { + 'FID': {'higher_is_better': False, 'th': [2.0, 10.0, 30.0]}, + 'SSIM': {'higher_is_better': True, 'th': [0.01, 0.05, 0.15]}, + 'PSNR': {'higher_is_better': True, 'th': [0.5, 2.0, 5.0]}, + 'FDS': {'higher_is_better': True, 'th': [0.02, 0.05, 0.1]}, + 'CLIP_IQS': {'higher_is_better': True, 'th': [0.01, 0.03, 0.08]}, + 'BRISQUE': {'higher_is_better': False, 'th': [2.0, 5.0, 10.0]}, +} +# 用于综合分析的降级权重 +ANALYSIS_WEIGHTS = {'Severe': 3, 'Significant': 2, 'Slight': 1, 'Negligible': 0} + + +# ----------------------------------------------------------------------------- +# 模型加载 (惰性加载或全局预加载) +# ----------------------------------------------------------------------------- + +try: + CLIP_MODEL, CLIP_PREPROCESS = clip.load('ViT-B/32', 'cuda') + CLIP_MODEL.eval() +except Exception as e: + print(f"[Warning] CLIP 模型加载失败: {e}") + CLIP_MODEL, CLIP_PREPROCESS = None, None + +def _get_clip_text_features(text: str) -> torch.Tensor: + """辅助函数:获取文本的 CLIP 特征。""" + if CLIP_MODEL is None: + return None + tokens = clip.tokenize(text).to('cuda') + with torch.no_grad(): + features = CLIP_MODEL.encode_text(tokens) + features /= features.norm(dim=-1, keepdim=True) + return features + +# ----------------------------------------------------------------------------- +# 核心计算逻辑 +# ----------------------------------------------------------------------------- + +def calculate_metrics( + ref_dir: str, + gen_dir: str, + image_size: int = 512 +) -> Dict[str, float]: + """计算图像集之间的多项质量评估指标。 + + 包括 FDS, SSIM, PSNR, CLIP_IQS, FID。 + + Args: + ref_dir: 参考图片目录路径。 + gen_dir: 生成图片目录路径。 + image_size: 图像处理尺寸。 + + Returns: + 包含各项指标名称和数值的字典。若目录无效返回空字典。 + """ + metrics = {} + + # 1. 数据加载 + def load_images(directory): + imgs = [] + if os.path.exists(directory): + for f in os.listdir(directory): + if f.lower().endswith(('.png', '.jpg', '.jpeg')): + try: + path = os.path.join(directory, f) + imgs.append(Image.open(path).convert("RGB")) + except Exception: + pass + return imgs + + ref_imgs = load_images(ref_dir) + gen_imgs = load_images(gen_dir) + + if not ref_imgs or not gen_imgs: + print(f"[Error] 图片加载失败或目录为空: \nRef: {ref_dir}\nGen: {gen_dir}") + return {} + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + with torch.no_grad(): + # --- FDS (Face Detection Similarity) --- + print(">>> 计算 FDS...") + mtcnn = MTCNN(image_size=image_size, margin=0, device=device) + resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) + + def get_face_embeds(img_list): + embeds = [] + for img in img_list: + face = mtcnn(img) + if face is not None: + embeds.append(resnet(face.unsqueeze(0).to(device))) + return torch.stack(embeds) if embeds else None + + ref_embeds = get_face_embeds(ref_imgs) + gen_embeds = get_face_embeds(gen_imgs) + + if ref_embeds is not None and gen_embeds is not None: + # 计算生成集每张脸与参考集所有脸的余弦相似度均值 + sims = [] + for g_emb in gen_embeds: + sim = torch.cosine_similarity(g_emb, ref_embeds).mean() + sims.append(sim) + metrics['FDS'] = torch.tensor(sims).mean().item() + else: + metrics['FDS'] = 0.0 + + # 清理显存 + del mtcnn, resnet + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + # --- SSIM & PSNR --- + print(">>> 计算 SSIM & PSNR...") + tfm = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor() + ]) + + # 将参考集堆叠为 [N, C, H, W] + ref_tensor = torch.stack([tfm(img) for img in ref_imgs]).to(device) + + ssim_accum, psnr_accum = 0.0, 0.0 + for img in gen_imgs: + gen_tensor = tfm(img).unsqueeze(0).to(device) # [1, C, H, W] + + # 扩展维度以匹配参考集 + gen_expanded = gen_tensor.expand_as(ref_tensor) + + # 计算单张生成图相对于整个参考集的平均结构相似度 + val_ssim = ssim(gen_expanded, ref_tensor, data_range=1.0) + val_psnr = psnr(gen_expanded, ref_tensor, data_range=1.0) + + ssim_accum += val_ssim.item() + psnr_accum += val_psnr.item() + + metrics['SSIM'] = ssim_accum / len(gen_imgs) + metrics['PSNR'] = psnr_accum / len(gen_imgs) + + # --- CLIP IQS --- + print(">>> 计算 CLIP IQS...") + if CLIP_MODEL: + iqs_accum = 0.0 + txt_feat = _get_clip_text_features("good image") + for img in gen_imgs: + img_tensor = CLIP_PREPROCESS(img).unsqueeze(0).to(device) + img_feat = CLIP_MODEL.encode_image(img_tensor) + img_feat /= img_feat.norm(dim=-1, keepdim=True) + iqs_accum += (img_feat @ txt_feat.T).item() + metrics['CLIP_IQS'] = iqs_accum / len(gen_imgs) + else: + metrics['CLIP_IQS'] = np.nan + + # --- FID --- + print(">>> 计算 FID...") + try: + fid_res = fid.calculate_metrics( + input1=ref_dir, + input2=gen_dir, + cuda=True, + fid=True, + verbose=False + ) + metrics['FID'] = fid_res['frechet_inception_distance'] + except Exception as e: + print(f"[Error] FID 计算异常: {e}") + metrics['FID'] = np.nan + + return metrics + + +def run_brisque_cleanly(img_dir: str) -> float: + """使用 subprocess 和临时目录优雅地执行外部 BRISQUE 脚本。 + + Args: + img_dir: 图像目录路径。 + + Returns: + BRISQUE 分数,若失败返回 NaN。 + """ + print(f">>> 计算 BRISQUE (External)...") + + script_path = Path(__file__).parent / 'libsvm' / 'python' / 'brisquequality.py' + if not script_path.exists(): + print(f"[Error] 找不到 BRISQUE 脚本: {script_path}") + return np.nan + + abs_img_dir = os.path.abspath(img_dir) + + with tempfile.TemporaryDirectory() as temp_dir: + try: + cmd = [ + "python", str(script_path), + abs_img_dir, + temp_dir + ] + + # 在脚本所在目录执行 + subprocess.run( + cmd, + cwd=script_path.parent, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + # 读取临时生成的日志文件 + log_file = Path(temp_dir) / 'log.txt' + if log_file.exists(): + content = log_file.read_text(encoding='utf-8').strip() + try: + return float(content.split()[-1]) + except ValueError: + return float(content) + else: + return np.nan + + except Exception as e: + print(f"[Error] BRISQUE 执行出错: {e}") + return np.nan + + +# ----------------------------------------------------------------------------- +# 报告可视化与分析逻辑 +# ----------------------------------------------------------------------------- + +def analyze_metric_diff( + metric_name: str, + clean_val: float, + pert_val: float +) -> Tuple[str, str, str]: + """生成科学的分级差异分析文本。 + + Args: + metric_name: 指标名称。 + clean_val: 干净图得分。 + pert_val: 扰动图得分。 + + Returns: + (表头箭头符号, 差异描述文本, 状态等级) + """ + + cfg = METRIC_ANALYSIS_META.get(metric_name) + if not cfg: + return "-", "Configuration not found.", "Negligible" + + diff = pert_val - clean_val + abs_diff = abs(diff) + + # 判定好坏: + is_better = (cfg['higher_is_better'] and diff > 0) or (not cfg['higher_is_better'] and diff < 0) + is_worse = not is_better + + # 确定程度 + th = cfg['th'] + if abs_diff < th[0]: + degree = "Negligible" + elif abs_diff < th[1]: + degree = "Slight" + elif abs_diff < th[2]: + degree = "Significant" + else: + degree = "Severe" + + # 组装文案 + header_arrow = r"$\uparrow$" if cfg['higher_is_better'] else r"$\downarrow$" + + if degree == "Negligible": + analysis_text = f"Negligible change (diff < {th[0]:.4f})." + elif is_worse: + analysis_text = f"{degree} degradation." + else: + analysis_text = f"Unexpected {degree} change." + + return header_arrow, analysis_text, degree + + +def generate_visual_report( + ref_dir: str, + clean_dir: str, + pert_dir: str, + clean_metrics: Dict, + pert_metrics: Dict, + output_path: str +): + """渲染并保存专业对比分析报告 (PNG)。""" + + def get_sample(d): + if not os.path.exists(d): return None, "N/A" + files = [f for f in os.listdir(d) if f.lower().endswith(('.png','.jpg'))] + if not files: return None, "Empty" + return Image.open(os.path.join(d, files[0])).convert("RGB"), files[0] + + img_ref, name_ref = get_sample(ref_dir) + img_clean, name_clean = get_sample(clean_dir) + img_pert, name_pert = get_sample(pert_dir) + + # 布局设置 + # 增加高度以容纳文本 + fig = plt.figure(figsize=(12, 16.5), dpi=120) + gs = gridspec.GridSpec(3, 2, height_ratios=[1, 1, 1.5], hspace=0.25, wspace=0.1) + + # 1. 图像展示区 + ax_ref = fig.add_subplot(gs[0, :]) + if img_ref: + ax_ref.imshow(img_ref) + ax_ref.set_title(f"Reference Image ($X$)\n{name_ref}", fontsize=12, fontweight='bold', pad=10) + ax_ref.axis('off') + + ax_c = fig.add_subplot(gs[1, 0]) + if img_clean: + ax_c.imshow(img_clean) + ax_c.set_title(f"Clean Output ($Y$)\n{name_clean}", fontsize=12, fontweight='bold', pad=10) + ax_c.axis('off') + + ax_p = fig.add_subplot(gs[1, 1]) + if img_pert: + ax_p.imshow(img_pert) + ax_p.set_title(f"Perturbed Output ($Y'$)\n{name_pert}", fontsize=12, fontweight='bold', pad=10) + ax_p.axis('off') + + # 2. 数据表格与分析区 + ax_data = fig.add_subplot(gs[2, :]) + ax_data.axis('off') + + metrics_list = ['FID', 'SSIM', 'PSNR', 'FDS', 'CLIP_IQS', 'BRISQUE'] + table_data = [] + analysis_lines = [] + + degradation_score = 0 + + # 遍历指标生成数据和分析 + for m in metrics_list: + c_val = clean_metrics.get(m, np.nan) + p_val = pert_metrics.get(m, np.nan) + + c_str = f"{c_val:.4f}" if not np.isnan(c_val) else "N/A" + p_str = f"{p_val:.4f}" if not np.isnan(p_val) else "N/A" + diff_str = "-" + + header_arrow = "" + + if not np.isnan(c_val) and not np.isnan(p_val): + # 获取深度分析 + header_arrow, text_desc, degree = analyze_metric_diff(m, c_val, p_val) + + # 计算差异值 + diff = p_val - c_val + # 差异值本身的符号 (Diff > 0 或 Diff < 0) + diff_arrow = r"$\nearrow$" if diff > 0 else r"$\searrow$" + if abs(diff) < 1e-4: diff_arrow = r"$\rightarrow$" + + diff_str = f"{diff:+.4f} {diff_arrow}" + + analysis_lines.append(f"• {m}: Change {diff:+.4f}. Analysis: {text_desc}") + + # 累计降级分数 + cfg = METRIC_ANALYSIS_META.get(m) + is_worse = (cfg['higher_is_better'] and diff < 0) or (not cfg['higher_is_better'] and diff > 0) + if is_worse: + degradation_score += ANALYSIS_WEIGHTS.get(degree, 0) + + # 表格第一列:名称 + 期望方向箭头 + name_with_arrow = f"{m} ({header_arrow})" if header_arrow else m + table_data.append([name_with_arrow, c_str, p_str, diff_str]) + + # 绘制表格 + table = ax_data.table( + cellText=table_data, + colLabels=["Metric (Goal)", "Clean ($Y$)", "Perturbed ($Y'$)", "Diff ($\Delta$)"], + loc='upper center', + cellLoc='center', + colWidths=[0.25, 0.25, 0.25, 0.25] + ) + table.scale(1, 2.0) + table.set_fontsize(11) + + # 美化表头 + for (row, col), cell in table.get_celld().items(): + if row == 0: + cell.set_text_props(weight='bold', color='white') + cell.set_facecolor('#404040') + elif col == 0: + cell.set_text_props(weight='bold') + cell.set_facecolor('#f5f5f5') + + # 3. 底部综合分析文本框 + if not analysis_lines: + analysis_lines.append("• All metrics are missing or invalid.") + + full_text = "Quantitative Difference Analysis:\n" + "\n".join(analysis_lines) + + # 总体结论判断 (基于 holistic degradation score) + conclusion = "\n\n>>> EXECUTIVE SUMMARY (Holistic Judgment):\n" + + if degradation_score >= 8: + conclusion += "CRITICAL DEGRADATION. Significant quality loss observed. Attack highly effective." + elif degradation_score >= 4: + conclusion += "MODERATE DEGRADATION. Observable quality drop in key metrics. Attack effective." + elif degradation_score > 0: + conclusion += "MINOR DEGRADATION. Slight quality loss detected. Attack partially effective." + else: + conclusion += "INEFFECTIVE ATTACK. No significant or unexpected statistical quality loss observed." + + full_text += conclusion + + ax_data.text( + 0.05, + 0.30, + full_text, + ha='left', + va='top', + fontsize=12, family='monospace', wrap=True, + transform=ax_data.transAxes + ) + + fig.suptitle("Museguard: Quality Assurance Report", fontsize=18, fontweight='bold', y=0.95) + + plt.savefig(output_path, bbox_inches='tight', facecolor='white') + print(f"\n[Success] 报告已生成: {output_path}") + + +# ----------------------------------------------------------------------------- +# 主入口 +# ----------------------------------------------------------------------------- + +def main(): + parser = ArgumentParser() + parser.add_argument('--clean_output_dir', type=str, required=True) + parser.add_argument('--perturbed_output_dir', type=str, required=True) + parser.add_argument('--clean_ref_dir', type=str, required=True) + parser.add_argument('--png_output_path', type=str, required=True) + parser.add_argument('--size', type=int, default=512) + args = parser.parse_args() + + + Path(args.png_output_path).parent.mkdir(parents=True, exist_ok=True) + + print("========================================") + print(" Image Quality Evaluation Toolkit") + print("========================================") + + # 1. 计算 Clean 组 + print(f"\n[1/2] Evaluating Clean Set: {os.path.basename(args.clean_output_dir)}") + c_metrics = calculate_metrics(args.clean_ref_dir, args.clean_output_dir, args.size) + if c_metrics: + c_metrics['BRISQUE'] = run_brisque_cleanly(args.clean_output_dir) + + # 2. 计算 Perturbed 组 + print(f"\n[2/2] Evaluating Perturbed Set: {os.path.basename(args.perturbed_output_dir)}") + p_metrics = calculate_metrics(args.clean_ref_dir, args.perturbed_output_dir, args.size) + if p_metrics: + p_metrics['BRISQUE'] = run_brisque_cleanly(args.perturbed_output_dir) + + # 3. 生成报告 + if c_metrics and p_metrics: + generate_visual_report( + args.clean_ref_dir, + args.clean_output_dir, + args.perturbed_output_dir, + c_metrics, + p_metrics, + args.png_output_path + ) + else: + print("\n[Fatal] 评估数据不完整,中止报告生成。") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/backend/app/algorithms/evaluates/libsvm/COPYRIGHT b/src/backend/app/algorithms/evaluates/libsvm/COPYRIGHT new file mode 100644 index 0000000..5fe2f22 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/libsvm/COPYRIGHT @@ -0,0 +1,31 @@ + +Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/backend/app/algorithms/evaluates/libsvm/FAQ.html b/src/backend/app/algorithms/evaluates/libsvm/FAQ.html new file mode 100644 index 0000000..42a175a --- /dev/null +++ b/src/backend/app/algorithms/evaluates/libsvm/FAQ.html @@ -0,0 +1,2166 @@ + + + + +LIBSVM FAQ + + + +

LIBSVM FAQ

+last modified : +Tue, 20 Oct 2015 13:43:40 GMT + +
  • All Questions(84)
  • + + + + + + +
    +

    + + +Q: Some courses which have used libsvm as a tool +
    +

    +

    +[Go Top] +


    + +Q: Some applications/tools which have used libsvm +
    +(and maybe liblinear). + +

    +[Go Top] +


    + +Q: Where can I find documents/videos of libsvm ? +
    +

    + +

    +

    +[Go Top] +


    + +Q: Where are change log and earlier versions? +
    +

    See the change log. + +

    You can download earlier versions +here. +

    +[Go Top] +


    + +Q: How to cite LIBSVM? +
    +

    +Please cite the following paper: +

    +Chih-Chung Chang and Chih-Jen Lin, LIBSVM +: a library for support vector machines. +ACM Transactions on Intelligent Systems and Technology, 2:27:1--27:27, 2011. +Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm +

    +The bibtex format is +

    +@article{CC01a,
    + author = {Chang, Chih-Chung and Lin, Chih-Jen},
    + title = {{LIBSVM}: A library for support vector machines},
    + journal = {ACM Transactions on Intelligent Systems and Technology},
    + volume = {2},
    + issue = {3},
    + year = {2011},
    + pages = {27:1--27:27},
    + note =	 {Software available at \url{http://www.csie.ntu.edu.tw/~cjlin/libsvm}}
    +}
    +
    +

    +[Go Top] +


    + +Q: I would like to use libsvm in my software. Is there any license problem? +
    +

    +We have "the modified BSD license," +so it is very easy to +use libsvm in your software. +Please check the COPYRIGHT file in detail. Basically +you need to +

      +
    1. +Clearly indicate that LIBSVM is used. +
    2. +
    3. +Retain the LIBSVM COPYRIGHT file in your software. +
    4. +
    +It can also be used in commercial products. +

    +[Go Top] +


    + +Q: Is there a repository of additional tools based on libsvm? +
    +

    +Yes, see libsvm +tools +

    +[Go Top] +


    + +Q: On unix machines, I got "error in loading shared libraries" or "cannot open shared object file." What happened ? +
    + +

    +This usually happens if you compile the code +on one machine and run it on another which has incompatible +libraries. +Try to recompile the program on that machine or use static linking. +

    +[Go Top] +


    + +Q: I have modified the source and would like to build the graphic interface "svm-toy" on MS windows. How should I do it ? +
    + +

    +Build it as a project by choosing "Win32 Project." +On the other hand, for "svm-train" and "svm-predict" +you want to choose "Win32 Console Project." +After libsvm 2.5, you can also use the file Makefile.win. +See details in README. + + +

    +If you are not using Makefile.win and see the following +link error +

    +LIBCMTD.lib(wwincrt0.obj) : error LNK2001: unresolved external symbol
    +_wWinMain@16
    +
    +you may have selected a wrong project type. +

    +[Go Top] +


    + +Q: I am an MS windows user but why only one (svm-toy) of those precompiled .exe actually runs ? +
    + +

    +You need to open a command window +and type svmtrain.exe to see all options. +Some examples are in README file. +

    +[Go Top] +


    + +Q: What is the difference between "." and "*" outputed during training? +
    + +

    +"." means every 1,000 iterations (or every #data +iterations is your #data is less than 1,000). +"*" means that after iterations of using +a smaller shrunk problem, +we reset to use the whole set. See the +implementation document for details. +

    +[Go Top] +


    + +Q: Why occasionally the program (including MATLAB or other interfaces) crashes and gives a segmentation fault? +
    + +

    +Very likely the program consumes too much memory than what the +operating system can provide. Try a smaller data and see if the +program still crashes. +

    +[Go Top] +


    + +Q: How to build a dynamic library (.dll file) on MS windows? +
    +

    + +The easiest way is to use Makefile.win. +See details in README. + +Alternatively, you can use Visual C++. Here is +the example using Visual Studio 2013: +

      +
    1. Create a Win32 empty DLL project and set (in Project->$Project_Name +Properties...->Configuration) to "Release." + About how to create a new dynamic link library, please refer to +http://msdn2.microsoft.com/en-us/library/ms235636(VS.80).aspx + +
    2. Add svm.cpp, svm.h to your project. +
    3. Add __WIN32__ and _CRT_SECURE_NO_DEPRECATE to Preprocessor definitions (in +Project->$Project_Name Properties...->C/C++->Preprocessor) +
    4. Set Create/Use Precompiled Header to Not Using Precompiled Headers +(in Project->$Project_Name Properties...->C/C++->Precompiled Headers) +
    5. Set the path for the Modulation Definition File svm.def (in +Project->$Project_Name Properties...->Linker->input +
    6. Build the DLL. +
    7. Rename the dll file to libsvm.dll and move it to the correct path. +
    + + +

    +[Go Top] +


    + +Q: On some systems (e.g., Ubuntu), compiling LIBSVM gives many warning messages. Is this a problem and how to disable the warning message? +
    + +

    +If you are using a version before 3.18, probably you see +a warning message like +

    +svm.cpp:2730: warning: ignoring return value of int fscanf(FILE*, const char*, ...), declared with attribute warn_unused_result
    +
    +This is not a problem; see this page for more +details of ubuntu systems. +To disable the warning message you can replace +
    +CFLAGS = -Wall -Wconversion -O3 -fPIC
    +
    +with +
    +CFLAGS = -Wall -Wconversion -O3 -fPIC -U_FORTIFY_SOURCE
    +
    +in Makefile. +

    After version 3.18, we have a better setting so that such warning messages do not appear. +

    +[Go Top] +


    + +Q: In LIBSVM, why you don't use certain C/C++ library functions to make the code shorter? +
    + +

    +For portability, we use only features defined in ISO C89. Note that features in ISO C99 may not be available everywhere. +Even the newest gcc lacks some features in C99 (see http://gcc.gnu.org/c99status.html for details). +If the situation changes in the future, +we might consider using these newer features. +

    +[Go Top] +


    + +Q: Why sometimes not all attributes of a data appear in the training/model files ? +
    +

    +libsvm uses the so called "sparse" format where zero +values do not need to be stored. Hence a data with attributes +

    +1 0 2 0
    +
    +is represented as +
    +1:1 3:2
    +
    +

    +[Go Top] +


    + +Q: What if my data are non-numerical ? +
    +

    +Currently libsvm supports only numerical data. +You may have to change non-numerical data to +numerical. For example, you can use several +binary attributes to represent a categorical +attribute. +

    +[Go Top] +


    + +Q: Why do you consider sparse format ? Will the training of dense data be much slower ? +
    +

    +This is a controversial issue. The kernel +evaluation (i.e. inner product) of sparse vectors is slower +so the total training time can be at least twice or three times +of that using the dense format. +However, we cannot support only dense format as then we CANNOT +handle extremely sparse cases. Simplicity of the code is another +concern. Right now we decide to support +the sparse format only. +

    +[Go Top] +


    + +Q: Why sometimes the last line of my data is not read by svm-train? +
    + +

    +We assume that you have '\n' in the end of +each line. So please press enter in the end +of your last line. +

    +[Go Top] +


    + +Q: Is there a program to check if my data are in the correct format? +
    + +

    +The svm-train program in libsvm conducts only a simple check of the input data. To do a +detailed check, after libsvm 2.85, you can use the python script tools/checkdata.py. See tools/README for details. +

    +[Go Top] +


    + +Q: May I put comments in data files? +
    + +

    +We don't officially support this. But, currently LIBSVM +is able to process data in the following +format: +

    +1 1:2 2:1 # your comments
    +
    +Note that the character ":" should not appear in your +comments. + +

    +[Go Top] +


    + +Q: How to convert other data formats to LIBSVM format? +
    + +

    +It depends on your data format. A simple way is to use +libsvmwrite in the libsvm matlab/octave interface. + +Take a CSV (comma-separated values) file +in UCI machine learning repository as an example. +We download SPECTF.train. +Labels are in the first column. The following steps produce +a file in the libsvm format. +

    +matlab> SPECTF = csvread('SPECTF.train'); % read a csv file
    +matlab> labels = SPECTF(:, 1); % labels from the 1st column
    +matlab> features = SPECTF(:, 2:end); 
    +matlab> features_sparse = sparse(features); % features must be in a sparse matrix
    +matlab> libsvmwrite('SPECTFlibsvm.train', labels, features_sparse);
    +
    +The tranformed data are stored in SPECTFlibsvm.train. + +

    +Alternatively, you can use convert.c +to convert CSV format to libsvm format. +

    +[Go Top] +


    + +Q: The output of training C-SVM is like the following. What do they mean? +
    +
    optimization finished, #iter = 219 +
    nu = 0.431030 +
    obj = -100.877286, rho = 0.424632 +
    nSV = 132, nBSV = 107 +
    Total nSV = 132 +

    +obj is the optimal objective value of the dual SVM problem. +rho is the bias term in the decision function +sgn(w^Tx - rho). +nSV and nBSV are number of support vectors and bounded support +vectors (i.e., alpha_i = C). nu-svm is a somewhat equivalent +form of C-SVM where C is replaced by nu. nu simply shows the +corresponding parameter. More details are in + +libsvm document. +

    +[Go Top] +


    + +Q: Can you explain more about the model file? +
    + +

    +In the model file, after parameters and other informations such as labels , each line represents a support vector. +Support vectors are listed in the order of "labels" shown earlier. +(i.e., those from the first class in the "labels" list are +grouped first, and so on.) +If k is the total number of classes, +in front of a support vector in class j, there are +k-1 coefficients +y*alpha where alpha are dual solution of the +following two class problems: +
    +1 vs j, 2 vs j, ..., j-1 vs j, j vs j+1, j vs j+2, ..., j vs k +
    +and y=1 in first j-1 coefficients, y=-1 in the remaining +k-j coefficients. + +For example, if there are 4 classes, the file looks like: + +

    ++-+-+-+--------------------+
    +|1|1|1|                    |
    +|v|v|v|  SVs from class 1  |
    +|2|3|4|                    |
    ++-+-+-+--------------------+
    +|1|2|2|                    |
    +|v|v|v|  SVs from class 2  |
    +|2|3|4|                    |
    ++-+-+-+--------------------+
    +|1|2|3|                    |
    +|v|v|v|  SVs from class 3  |
    +|3|3|4|                    |
    ++-+-+-+--------------------+
    +|1|2|3|                    |
    +|v|v|v|  SVs from class 4  |
    +|4|4|4|                    |
    ++-+-+-+--------------------+
    +
    +See also + an illustration using +MATLAB/OCTAVE. +

    +[Go Top] +


    + +Q: Should I use float or double to store numbers in the cache ? +
    + +

    +We have float as the default as you can store more numbers +in the cache. +In general this is good enough but for few difficult +cases (e.g. C very very large) where solutions are huge +numbers, it might be possible that the numerical precision is not +enough using only float. +

    +[Go Top] +


    + +Q: Does libsvm have special treatments for linear SVM? +
    + +

    + +No, libsvm solves linear/nonlinear SVMs by the +same way. +Some tricks may save training/testing time if the +linear kernel is used, +so libsvm is NOT particularly efficient for linear SVM, +especially when +C is large and +the number of data is much larger +than the number of attributes. +You can either +

    + +

    Please also see our SVM guide +on the discussion of using RBF and linear +kernels. +

    +[Go Top] +


    + +Q: The number of free support vectors is large. What should I do? +
    +

    +This usually happens when the data are overfitted. +If attributes of your data are in large ranges, +try to scale them. Then the region +of appropriate parameters may be larger. +Note that there is a scale program +in libsvm. +

    +[Go Top] +


    + +Q: Should I scale training and testing data in a similar way? +
    +

    +Yes, you can do the following: +

    +> svm-scale -s scaling_parameters train_data > scaled_train_data
    +> svm-scale -r scaling_parameters test_data > scaled_test_data
    +
    +

    +[Go Top] +


    + +Q: On windows sometimes svm-scale.exe generates some non-ASCII data not good for training/prediction? +
    +

    +In general this does not happen, but we have observed in some rare +situations, the output of svm-scale.exe directed to a file (by ">") +has wrong encoding. That is, the file is not an ASCII file, so cannot be +used for training/prediction. Please let us know if this happens as at this moment +we don't clearly see how to fix the problem. +

    +[Go Top] +


    + +Q: Does it make a big difference if I scale each attribute to [0,1] instead of [-1,1]? +
    + +

    +For the linear scaling method, if the RBF kernel is +used and parameter selection is conducted, there +is no difference. Assume Mi and mi are +respectively the maximal and minimal values of the +ith attribute. Scaling to [0,1] means +

    +                x'=(x-mi)/(Mi-mi)
    +
    +For [-1,1], +
    +                x''=2(x-mi)/(Mi-mi)-1.
    +
    +In the RBF kernel, +
    +                x'-y'=(x-y)/(Mi-mi), x''-y''=2(x-y)/(Mi-mi).
    +
    +Hence, using (C,g) on the [0,1]-scaled data is the +same as (C,g/2) on the [-1,1]-scaled data. + +

    Though the performance is the same, the computational +time may be different. For data with many zero entries, +[0,1]-scaling keeps the sparsity of input data and hence +may save the time. +

    +[Go Top] +


    + +Q: The prediction rate is low. How could I improve it? +
    +

    +Try to use the model selection tool grid.py in the tools +directory find +out good parameters. To see the importance of model selection, +please +see our guide for beginners: + +A practical guide to support vector +classification + +

    +[Go Top] +


    + +Q: My data are unbalanced. Could libsvm handle such problems? +
    +

    +Yes, there is a -wi options. For example, if you use +

    +> svm-train -s 0 -c 10 -w1 1 -w-1 5 data_file
    +
    +

    +the penalty for class "-1" is larger. +Note that this -w option is for C-SVC only. +

    +[Go Top] +


    + +Q: What is the difference between nu-SVC and C-SVC? +
    +

    +Basically they are the same thing but with different +parameters. The range of C is from zero to infinity +but nu is always between [0,1]. A nice property +of nu is that it is related to the ratio of +support vectors and the ratio of the training +error. +

    +[Go Top] +


    + +Q: The program keeps running (without showing any output). What should I do? +
    +

    +You may want to check your data. Each training/testing +data must be in one line. It cannot be separated. +In addition, you have to remove empty lines. +

    +[Go Top] +


    + +Q: The program keeps running (with output, i.e. many dots). What should I do? +
    +

    +In theory libsvm guarantees to converge. +Therefore, this means you are +handling ill-conditioned situations +(e.g. too large/small parameters) so numerical +difficulties occur. +

    +You may get better numerical stability by replacing +

    +typedef float Qfloat;
    +
    +in svm.cpp with +
    +typedef double Qfloat;
    +
    +That is, elements in the kernel cache are stored +in double instead of single. However, this means fewer elements +can be put in the kernel cache. +

    +[Go Top] +


    + +Q: The training time is too long. What should I do? +
    +

    +For large problems, please specify enough cache size (i.e., +-m). +Slow convergence may happen for some difficult cases (e.g. -c is large). +You can try to use a looser stopping tolerance with -e. +If that still doesn't work, you may train only a subset of the data. +You can use the program subset.py in the directory "tools" +to obtain a random subset. + +

    +If you have extremely large data and face this difficulty, please +contact us. We will be happy to discuss possible solutions. + +

    When using large -e, you may want to check if -h 0 (no shrinking) or -h 1 (shrinking) is faster. +See a related question below. + +

    +[Go Top] +


    + +Q: Does shrinking always help? +
    +

    +If the number of iterations is high, then shrinking +often helps. +However, if the number of iterations is small +(e.g., you specify a large -e), then +probably using -h 0 (no shrinking) is better. +See the +implementation document for details. +

    +[Go Top] +


    + +Q: How do I get the decision value(s)? +
    +

    +We print out decision values for regression. For classification, +we solve several binary SVMs for multi-class cases. You +can obtain values by easily calling the subroutine +svm_predict_values. Their corresponding labels +can be obtained from svm_get_labels. +Details are in +README of libsvm package. + +

    +If you are using MATLAB/OCTAVE interface, svmpredict can directly +give you decision values. Please see matlab/README for details. + +

    +We do not recommend the following. But if you would +like to get values for +TWO-class classification with labels +1 and -1 +(note: +1 and -1 but not things like 5 and 10) +in the easiest way, simply add +

    +		printf("%f\n", dec_values[0]*model->label[0]);
    +
    +after the line +
    +		svm_predict_values(model, x, dec_values);
    +
    +of the file svm.cpp. +Positive (negative) +decision values correspond to data predicted as +1 (-1). + + +

    +[Go Top] +


    + +Q: How do I get the distance between a point and the hyperplane? +
    +

    +The distance is |decision_value| / |w|. +We have |w|^2 = w^Tw = alpha^T Q alpha = 2*(dual_obj + sum alpha_i). +Thus in svm.cpp please find the place +where we calculate the dual objective value +(i.e., the subroutine Solve()) +and add a statement to print w^Tw. + +More precisely, here is what you need to do +

      +
    1. Search for "calculate objective value" in svm.cpp +
    2. +
    3. In that place, si->obj is the variable for the objective value +
    4. +
    5. Add a for loop to calculate the sum of alpha +
    6. +
    7. Calculate 2*(si->obj + sum of alpha) and print the square root of it. You now get |w|. You +need to recompile the code +
    8. +
    9. Check an earlier FAQ on printing decision values. You +need to recompile the code +
    10. +
    11. +Then print decision value divided by the |w| value obtained earlier. +
    12. +
    +

    +[Go Top] +


    + +Q: On 32-bit machines, if I use a large cache (i.e. large -m) on a linux machine, why sometimes I get "segmentation fault ?" +
    +

    + +On 32-bit machines, the maximum addressable +memory is 4GB. The Linux kernel uses 3:1 +split which means user space is 3G and +kernel space is 1G. Although there are +3G user space, the maximum dynamic allocation +memory is 2G. So, if you specify -m near 2G, +the memory will be exhausted. And svm-train +will fail when it asks more memory. +For more details, please read + +this article. +

    +The easiest solution is to switch to a + 64-bit machine. +Otherwise, there are two ways to solve this. If your +machine supports Intel's PAE (Physical Address +Extension), you can turn on the option HIGHMEM64G +in Linux kernel which uses 4G:4G split for +kernel and user space. If you don't, you can +try a software `tub' which can eliminate the 2G +boundary for dynamic allocated memory. The `tub' +is available at +http://www.bitwagon.com/tub.html. + + + +

    +[Go Top] +


    + +Q: How do I disable screen output of svm-train? +
    +

    +For commend-line users, use the -q option: +

    +> ./svm-train -q heart_scale
    +
    +

    +For library users, set the global variable +

    +extern void (*svm_print_string) (const char *);
    +
    +to specify the output format. You can disable the output by the following steps: +
      +
    1. +Declare a function to output nothing: +
      +void print_null(const char *s) {}
      +
      +
    2. +
    3. +Assign the output function of libsvm by +
      +svm_print_string = &print_null;
      +
      +
    4. +
    +Finally, a way used in earlier libsvm +is by updating svm.cpp from +
    +#if 1
    +void info(const char *fmt,...)
    +
    +to +
    +#if 0
    +void info(const char *fmt,...)
    +
    +

    +[Go Top] +


    + +Q: I would like to use my own kernel. Any example? In svm.cpp, there are two subroutines for kernel evaluations: k_function() and kernel_function(). Which one should I modify ? +
    +

    +An example is "LIBSVM for string data" in LIBSVM Tools. +

    +The reason why we have two functions is as follows. +For the RBF kernel exp(-g |xi - xj|^2), if we calculate +xi - xj first and then the norm square, there are 3n operations. +Thus we consider exp(-g (|xi|^2 - 2dot(xi,xj) +|xj|^2)) +and by calculating all |xi|^2 in the beginning, +the number of operations is reduced to 2n. +This is for the training. For prediction we cannot +do this so a regular subroutine using that 3n operations is +needed. + +The easiest way to have your own kernel is +to put the same code in these two +subroutines by replacing any kernel. +

    +[Go Top] +


    + +Q: What method does libsvm use for multi-class SVM ? Why don't you use the "1-against-the rest" method? +
    +

    +It is one-against-one. We chose it after doing the following +comparison: +C.-W. Hsu and C.-J. Lin. + +A comparison of methods +for multi-class support vector machines +, +IEEE Transactions on Neural Networks, 13(2002), 415-425. + +

    +"1-against-the rest" is a good method whose performance +is comparable to "1-against-1." We do the latter +simply because its training time is shorter. +

    +[Go Top] +


    + +Q: I would like to solve L2-loss SVM (i.e., error term is quadratic). How should I modify the code ? +
    +

    +It is extremely easy. Taking c-svc for example, to solve +

    +min_w w^Tw/2 + C \sum max(0, 1- (y_i w^Tx_i+b))^2, +

    +only two +places of svm.cpp have to be changed. +First, modify the following line of +solve_c_svc from +

    +	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
    +		alpha, Cp, Cn, param->eps, si, param->shrinking);
    +
    +to +
    +	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
    +		alpha, INF, INF, param->eps, si, param->shrinking);
    +
    +Second, in the class of SVC_Q, declare C as +a private variable: +
    +	double C;
    +
    +In the constructor replace +
    +	for(int i=0;i<prob.l;i++)
    +		QD[i]= (Qfloat)(this->*kernel_function)(i,i);
    +
    +with +
    +        this->C = param.C;
    +	for(int i=0;i<prob.l;i++)
    +		QD[i]= (Qfloat)(this->*kernel_function)(i,i)+0.5/C;
    +
    +Then in the subroutine get_Q, after the for loop, add +
    +        if(i >= start && i < len) 
    +		data[i] += 0.5/C;
    +
    + +

    +For one-class svm, the modification is exactly the same. For SVR, you don't need an if statement like the above. Instead, you only need a simple assignment: +

    +	data[real_i] += 0.5/C;
    +
    + + +

    +For large linear L2-loss SVM, please use +LIBLINEAR. +

    +[Go Top] +


    + +Q: In one-class SVM, parameter nu should be an upper bound of the training error rate. Why sometimes I get a training error rate bigger than nu? +
    + +

    +At optimum, some training instances should satisfy +w^Tx - rho = 0. However, numerically they may be slightly +smaller than zero +Then they are wrongly counted +as training errors. You can use a smaller stopping tolerance +(by the -e option) to make this problem less serious. + +

    +This issue does not occur for nu-SVC for +two-class classification. +We have that +

      +
    1. nu is an upper bound on the ratio of training points +on the wrong side of the hyperplane, and +
    2. therefore, nu is also an upper bound on the training error rate. +
    +Numerical issues occur in calculating the first case +because some training points satisfying y(w^Tx + b) - rho = 0 +become negative. +However, we have no numerical problems for the second case because +we compare y(w^Tx + b) and 0 for counting training errors. +

    +[Go Top] +


    + +Q: Why the code gives NaN (not a number) results? +
    +

    +This rarely happens, but few users reported the problem. +It seems that their +computers for training libsvm have the VPN client +running. The VPN software has some bugs and causes this +problem. Please try to close or disconnect the VPN client. +

    +[Go Top] +


    + +Q: Why the sign of predicted labels and decision values are sometimes reversed? +
    +

    + +This situation may occur before version 3.17. +Nothing is wrong. Very likely you have two labels +1/-1 and the first instance in your data +has -1. We give the following explanation. + +

    +Internally class labels are ordered by their first occurrence in the training set. For a k-class data, internally labels +are 0, ..., k-1, and each two-class SVM considers pair +(i, j) with i < j. Then class i is treated as positive (+1) +and j as negative (-1). +For example, if the data set has labels +5/+10 and +10 appears +first, then internally the +5 versus +10 SVM problem +has +10 as positive (+1) and +5 as negative (-1). + +

    +By this setting, if you have labels +1 and -1, +it's possible that internally they correspond to -1 and +1, +respectively. Some new users have been confused about +this, so after version 3.17, if the data set has only +two labels +1 and -1, +internally we ensure +1 to be before -1. Then class +1 +is always treated as positive in the SVM problem. +Note that this is for two-class data only. +

    +[Go Top] +


    + +Q: I don't know class labels of test data. What should I put in the first column of the test file? +
    +

    Any value is ok. In this situation, what you will use is the output file of svm-predict, which gives predicted class labels. + + +

    +[Go Top] +


    + +Q: How can I use OpenMP to parallelize LIBSVM on a multicore/shared-memory computer? +
    + +

    It is very easy if you are using GCC 4.2 +or after. + +

    In Makefile, add -fopenmp to CFLAGS. + +

    In class SVC_Q of svm.cpp, modify the for loop +of get_Q to: +

    +#pragma omp parallel for private(j) schedule(guided)
    +			for(j=start;j<len;j++)
    +
    +

    In the subroutine svm_predict_values of svm.cpp, add one line to the for loop: +

    +#pragma omp parallel for private(i) schedule(guided) 
    +		for(i=0;i<l;i++)
    +			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
    +
    +For regression, you need to modify +class SVR_Q instead. The loop in svm_predict_values +is also different because you need +a reduction clause for the variable sum: +
    +#pragma omp parallel for private(i) reduction(+:sum) schedule(guided)
    +		for(i=0;i<model->l;i++)
    +			sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
    +
    + +

    Then rebuild the package. Kernel evaluations in training/testing will be parallelized. An example of running this modification on +an 8-core machine using the data set +real-sim: + +

    8 cores: +

    +%setenv OMP_NUM_THREADS 8
    +%time svm-train -c 8 -g 0.5 -m 1000 real-sim
    +175.90sec
    +
    +1 core: +
    +%setenv OMP_NUM_THREADS 1
    +%time svm-train -c 8 -g 0.5 -m 1000 real-sim
    +588.89sec
    +
    +For this data, kernel evaluations take 91% of training time. In the above example, we assume you use csh. For bash, use +
    +export OMP_NUM_THREADS=8
    +
    +instead. + +

    For Python interface, you need to add the -lgomp link option: +

    +$(CXX) -lgomp -shared -dynamiclib svm.o -o libsvm.so.$(SHVER)
    +
    + +

    For MS Windows, you need to add /openmp in CFLAGS of Makefile.win + +

    +[Go Top] +


    + +Q: How could I know which training instances are support vectors? +
    + +

    +It's very simple. Since version 3.13, you can use the function +

    +void svm_get_sv_indices(const struct svm_model *model, int *sv_indices)
    +
    +to get indices of support vectors. For example, in svm-train.c, after +
    +		model = svm_train(&prob, &param);
    +
    +you can add +
    +		int nr_sv = svm_get_nr_sv(model);
    +		int *sv_indices = Malloc(int, nr_sv);
    +		svm_get_sv_indices(model, sv_indices);
    +		for (int i=0; i<nr_sv; i++)
    +			printf("instance %d is a support vector\n", sv_indices[i]);
    +
    + +

    If you use matlab interface, you can directly check +

    +model.sv_indices
    +
    +

    +[Go Top] +


    + +Q: Why sv_indices (indices of support vectors) are not stored in the saved model file? +
    + +

    +Although sv_indices is a member of the model structure +to +indicate support vectors in the training set, +we do not store its contents in the model file. +The model file is mainly used in the future for +prediction, so it is basically independent +from training data. Thus +storing sv_indices is not necessary. +Users should find support vectors right after +the training process. See the previous FAQ. +

    +[Go Top] +


    + +Q: After doing cross validation, why there is no model file outputted ? +
    +

    +Cross validation is used for selecting good parameters. +After finding them, you want to re-train the whole +data without the -v option. +

    +[Go Top] +


    + +Q: Why my cross-validation results are different from those in the Practical Guide? +
    +

    + +Due to random partitions of +the data, on different systems CV accuracy values +may be different. +

    +[Go Top] +


    + +Q: On some systems CV accuracy is the same in several runs. How could I use different data partitions? In other words, how do I set random seed in LIBSVM? +
    +

    +If you use GNU C library, +the default seed 1 is considered. Thus you always +get the same result of running svm-train -v. +To have different seeds, you can add the following code +in svm-train.c: +

    +#include <time.h>
    +
    +and in the beginning of main(), +
    +srand(time(0));
    +
    +Alternatively, if you are not using GNU C library +and would like to use a fixed seed, you can have +
    +srand(1);
    +
    + +

    +For Java, the random number generator +is initialized using the time information. +So results of two CV runs are different. +To fix the seed, after version 3.1 (released +in mid 2011), you can add +

    +svm.rand.setSeed(0);
    +
    +in the main() function of svm_train.java. + +

    +If you use CV to select parameters, it is recommended to use identical folds +under different parameters. In this case, you can consider fixing the seed. +

    +[Go Top] +


    + +Q: Why on windows sometimes grid.py fails? +
    +

    + +This problem shouldn't happen after version +2.85. If you are using earlier versions, +please download the latest one. + + +

    +[Go Top] +


    + +Q: Why grid.py/easy.py sometimes generates the following warning message? +
    +
    +Warning: empty z range [62.5:62.5], adjusting to [61.875:63.125]
    +Notice: cannot contour non grid data!
    +
    +

    Nothing is wrong and please disregard the +message. It is from gnuplot when drawing +the contour. +

    +[Go Top] +


    + +Q: How do I choose the kernel? +
    + +

    +In general we suggest you to try the RBF kernel first. +A recent result by Keerthi and Lin +( +download paper here) +shows that if RBF is used with model selection, +then there is no need to consider the linear kernel. +The kernel matrix using sigmoid may not be positive definite +and in general it's accuracy is not better than RBF. +(see the paper by Lin and Lin +( +download paper here). +Polynomial kernels are ok but if a high degree is used, +numerical difficulties tend to happen +(thinking about dth power of (<1) goes to 0 +and (>1) goes to infinity). +

    +[Go Top] +


    + +Q: How does LIBSVM perform parameter selection for multi-class problems? +
    + +

    +LIBSVM implements "one-against-one" multi-class method, so there are +k(k-1)/2 binary models, where k is the number of classes. + +

    +We can consider two ways to conduct parameter selection. + +

      +
    1. +For any two classes of data, a parameter selection procedure is conducted. Finally, +each decision function has its own optimal parameters. +
    2. +
    3. +The same parameters are used for all k(k-1)/2 binary classification problems. +We select parameters that achieve the highest overall performance. +
    4. +
    + +Each has its own advantages. A +single parameter set may not be uniformly good for all k(k-1)/2 decision functions. +However, as the overall accuracy is the final consideration, one parameter set +for one decision function may lead to over-fitting. In the paper +

    +Chen, Lin, and Schölkopf, + +A tutorial on nu-support vector machines. + +Applied Stochastic Models in Business and Industry, 21(2005), 111-136, + +

    +they have experimentally +shown that the two methods give similar performance. +Therefore, currently the parameter selection in LIBSVM +takes the second approach by considering the same parameters for +all k(k-1)/2 models. +

    +[Go Top] +


    + +Q: How do I choose parameters for one-class SVM as training data are in only one class? +
    +

    +You have pre-specified true positive rate in mind and then search for +parameters which achieve similar cross-validation accuracy. +

    +[Go Top] +


    + +Q: Instead of grid.py, what if I would like to conduct parameter selection using other programmin languages? +
    +

    +For MATLAB, please see another question in FAQ. + +

    +For using shell scripts, please check the code written by Bjarte Johansen +

    +[Go Top] +


    + +Q: Why training a probability model (i.e., -b 1) takes a longer time? +
    +

    +To construct this probability model, we internally conduct a +cross validation, which is more time consuming than +a regular training. +Hence, in general you do parameter selection first without +-b 1. You only use -b 1 when good parameters have been +selected. In other words, you avoid using -b 1 and -v +together. +

    +[Go Top] +


    + +Q: Why using the -b option does not give me better accuracy? +
    +

    +There is absolutely no reason the probability outputs guarantee +you better accuracy. The main purpose of this option is +to provide you the probability estimates, but not to boost +prediction accuracy. From our experience, +after proper parameter selections, in general with +and without -b have similar accuracy. Occasionally there +are some differences. +It is not recommended to compare the two under +just a fixed parameter +set as more differences will be observed. +

    +[Go Top] +


    + +Q: Why using svm-predict -b 0 and -b 1 gives different accuracy values? +
    +

    +Let's just consider two-class classification here. After probability information is obtained in training, +we do not have +

    +prob > = 0.5 if and only if decision value >= 0. +

    +So predictions may be different with -b 0 and 1. +

    +[Go Top] +


    + +Q: How can I save images drawn by svm-toy? +
    +

    +For Microsoft windows, first press the "print screen" key on the keyboard. +Open "Microsoft Paint" +(included in Windows) +and press "ctrl-v." Then you can clip +the part of picture which you want. +For X windows, you can +use the program "xv" or "import" to grab the picture of the svm-toy window. +

    +[Go Top] +


    + +Q: I press the "load" button to load data points but why svm-toy does not draw them ? +
    +

    +The program svm-toy assumes both attributes (i.e. x-axis and y-axis +values) are in (0,1). Hence you want to scale your +data to between a small positive number and +a number less than but very close to 1. +Moreover, class labels must be 1, 2, or 3 +(not 1.0, 2.0 or anything else). +

    +[Go Top] +


    + +Q: I would like svm-toy to handle more than three classes of data, what should I do ? +
    +

    +Taking windows/svm-toy.cpp as an example, you need to +modify it and the difference +from the original file is as the following: (for five classes of +data) +

    +30,32c30
    +< 	RGB(200,0,200),
    +< 	RGB(0,160,0),
    +< 	RGB(160,0,0)
    +---
    +> 	RGB(200,0,200)
    +39c37
    +< HBRUSH brush1, brush2, brush3, brush4, brush5;
    +---
    +> HBRUSH brush1, brush2, brush3;
    +113,114d110
    +< 	brush4 = CreateSolidBrush(colors[7]);
    +< 	brush5 = CreateSolidBrush(colors[8]);
    +155,157c151
    +< 	else if(v==3) return brush3;
    +< 	else if(v==4) return brush4;
    +< 	else return brush5;
    +---
    +> 	else return brush3;
    +325d318
    +< 	  int colornum = 5;
    +327c320
    +< 		svm_node *x_space = new svm_node[colornum * prob.l];
    +---
    +> 		svm_node *x_space = new svm_node[3 * prob.l];
    +333,338c326,331
    +< 			x_space[colornum * i].index = 1;
    +< 			x_space[colornum * i].value = q->x;
    +< 			x_space[colornum * i + 1].index = 2;
    +< 			x_space[colornum * i + 1].value = q->y;
    +< 			x_space[colornum * i + 2].index = -1;
    +< 			prob.x[i] = &x_space[colornum * i];
    +---
    +> 			x_space[3 * i].index = 1;
    +> 			x_space[3 * i].value = q->x;
    +> 			x_space[3 * i + 1].index = 2;
    +> 			x_space[3 * i + 1].value = q->y;
    +> 			x_space[3 * i + 2].index = -1;
    +> 			prob.x[i] = &x_space[3 * i];
    +397c390
    +< 				if(current_value > 5) current_value = 1;
    +---
    +> 				if(current_value > 3) current_value = 1;
    +
    +

    +[Go Top] +


    + +Q: What is the difference between Java version and C++ version of libsvm? +
    +

    +They are the same thing. We just rewrote the C++ code +in Java. +

    +[Go Top] +


    + +Q: Is the Java version significantly slower than the C++ version? +
    +

    +This depends on the VM you used. We have seen good +VM which leads the Java version to be quite competitive with +the C++ code. (though still slower) +

    +[Go Top] +


    + +Q: While training I get the following error message: java.lang.OutOfMemoryError. What is wrong? +
    +

    +You should try to increase the maximum Java heap size. +For example, +

    +java -Xmx2048m -classpath libsvm.jar svm_train ...
    +
    +sets the maximum heap size to 2048M. +

    +[Go Top] +


    + +Q: Why you have the main source file svm.m4 and then transform it to svm.java? +
    +

    +Unlike C, Java does not have a preprocessor built-in. +However, we need some macros (see first 3 lines of svm.m4). + + +

    +[Go Top] +


    + +Q: Except the python-C++ interface provided, could I use Jython to call libsvm ? +
    +

    Yes, here are some examples: + +

    +$ export CLASSPATH=$CLASSPATH:~/libsvm-2.91/java/libsvm.jar
    +$ ./jython
    +Jython 2.1a3 on java1.3.0 (JIT: jitc)
    +Type "copyright", "credits" or "license" for more information.
    +>>> from libsvm import *
    +>>> dir()
    +['__doc__', '__name__', 'svm', 'svm_model', 'svm_node', 'svm_parameter',
    +'svm_problem']
    +>>> x1 = [svm_node(index=1,value=1)]
    +>>> x2 = [svm_node(index=1,value=-1)]
    +>>> param = svm_parameter(svm_type=0,kernel_type=2,gamma=1,cache_size=40,eps=0.001,C=1,nr_weight=0,shrinking=1)
    +>>> prob = svm_problem(l=2,y=[1,-1],x=[x1,x2])
    +>>> model = svm.svm_train(prob,param)
    +*
    +optimization finished, #iter = 1
    +nu = 1.0
    +obj = -1.018315639346838, rho = 0.0
    +nSV = 2, nBSV = 2
    +Total nSV = 2
    +>>> svm.svm_predict(model,x1)
    +1.0
    +>>> svm.svm_predict(model,x2)
    +-1.0
    +>>> svm.svm_save_model("test.model",model)
    +
    +
    + +

    +[Go Top] +


    + +Q: I compile the MATLAB interface without problem, but why errors occur while running it? +
    +

    +Your compiler version may not be supported/compatible for MATLAB. +Please check this MATLAB page first and then specify the version +number. For example, if g++ X.Y is supported, replace +

    +CXX = g++
    +
    +in the Makefile with +
    +CXX = g++-X.Y
    +
    +

    +[Go Top] +


    + +Q: On 64bit Windows I compile the MATLAB interface without problem, but why errors occur while running it? +
    +

    + + +Please make sure that you use +the -largeArrayDims option in make.m. For example, +

    +mex -largeArrayDims -O -c svm.cpp
    +
    + +Moreover, if you use Microsoft Visual Studio, +probabally it is not properly installed. +See the explanation +here. +

    +[Go Top] +


    + +Q: Does the MATLAB interface provide a function to do scaling? +
    +

    +It is extremely easy to do scaling under MATLAB. +The following one-line code scale each feature to the range +of [0,1]: +

    +(data - repmat(min(data,[],1),size(data,1),1))*spdiags(1./(max(data,[],1)-min(data,[],1))',0,size(data,2),size(data,2))
    +
    +

    +[Go Top] +


    + +Q: How could I use MATLAB interface for parameter selection? +
    +

    +One can do this by a simple loop. +See the following example: +

    +bestcv = 0;
    +for log2c = -1:3,
    +  for log2g = -4:1,
    +    cmd = ['-v 5 -c ', num2str(2^log2c), ' -g ', num2str(2^log2g)];
    +    cv = svmtrain(heart_scale_label, heart_scale_inst, cmd);
    +    if (cv >= bestcv),
    +      bestcv = cv; bestc = 2^log2c; bestg = 2^log2g;
    +    end
    +    fprintf('%g %g %g (best c=%g, g=%g, rate=%g)\n', log2c, log2g, cv, bestc, bestg, bestcv);
    +  end
    +end
    +
    +You may adjust the parameter range in the above loops. +

    +[Go Top] +


    + +Q: I use MATLAB parallel programming toolbox on a multi-core environment for parameter selection. Why the program is even slower? +
    +

    +Fabrizio Lacalandra of University of Pisa reported this issue. +It seems the problem is caused by the screen output. +If you disable the info function +using

    #if 0,
    then the problem +may be solved. +

    +[Go Top] +


    + +Q: How to use LIBSVM with OpenMP under MATLAB/Octave? +
    + +

    +First, you must modify svm.cpp. Check the following faq, + +How can I use OpenMP to parallelize LIBSVM on a multicore/shared-memory computer? + +

    +To build the MATLAB/Octave interface, we recommend using make.m. +You must append '-fopenmp' to CXXFLAGS and add '-lgomp' to mex options in make.m. +See details below. + +

    +For MATLAB users, the modified code is: +

    +mex CFLAGS="\$CFLAGS -std=c99" CXXFLAGS="\$CXXFLAGS -fopenmp" -largeArrayDims -I.. -lgomp svmtrain.c ../svm.cpp svm_model_matlab.c
    +mex CFLAGS="\$CFLAGS -std=c99" CXXFLAGS="\$CXXFLAGS -fopenmp" -largeArrayDims -I.. -lgomp svmpredict.c ../svm.cpp svm_model_matlab.c
    +
    + +

    +For Octave users, the modified code is: +

    +setenv('CXXFLAGS', '-fopenmp') 
    +mex -I.. -lgomp svmtrain.c ../svm.cpp svm_model_matlab.c
    +mex -I.. -lgomp svmpredict.c ../svm.cpp svm_model_matlab.c
    +
    + +

    +If make.m fails under matlab and you use Makefile to compile the codes, +you must modify two files: + +

    +You must append '-fopenmp' to CFLAGS in ../Makefile for C/C++ codes: +

    +CFLAGS = -Wall -Wconversion -O3 -fPIC -fopenmp -I$(MATLABDIR)/extern/include -I..
    +
    +and add '-lgomp' to MEX_OPTION in Makefile for the matlab/octave interface: +
    +MEX_OPTION += -lgomp
    +
    + +

    + To run the code, you must specify the number of threads. For + example, before executing matlab/octave, you run +

    +> export OMP_NUM_THREADS=8
    +> matlab
    +
    +Here we assume Bash is used. Unfortunately, we do not know yet +how to specify the number of threads within MATLAB/Octave. Our +experiments show that +
    +>> setenv('OMP_NUM_THREADS', '8');
    +
    +does not work. Please contact us if you +see how to solve this problem. On the other hand, you can +specify the number of threads in the source code (thanks +to comments from Ricardo Santiago-mozos): +
    +#pragma omp parallel  for private(i) num_threads(8)
    +
    +

    +[Go Top] +


    + +Q: How could I generate the primal variable w of linear SVM? +
    +

    +Let's start from the binary class and +assume you have two labels -1 and +1. +After obtaining the model from calling svmtrain, +do the following to have w and b: +

    +w = model.SVs' * model.sv_coef;
    +b = -model.rho;
    +
    +if model.Label(1) == -1
    +  w = -w;
    +  b = -b;
    +end
    +
    +If you do regression or one-class SVM, then the if statement is not needed. + +

    For multi-class SVM, we illustrate the setting +in the following example of running the iris +data, which have 3 classes +

      
    +> [y, x] = libsvmread('../../htdocs/libsvmtools/datasets/multiclass/iris.scale');
    +> m = svmtrain(y, x, '-t 0')
    +
    +m = 
    +
    +    Parameters: [5x1 double]
    +      nr_class: 3
    +       totalSV: 42
    +           rho: [3x1 double]
    +         Label: [3x1 double]
    +         ProbA: []
    +         ProbB: []
    +           nSV: [3x1 double]
    +       sv_coef: [42x2 double]
    +           SVs: [42x4 double]
    +
    +sv_coef is like: +
    ++-+-+--------------------+
    +|1|1|                    |
    +|v|v|  SVs from class 1  |
    +|2|3|                    |
    ++-+-+--------------------+
    +|1|2|                    |
    +|v|v|  SVs from class 2  |
    +|2|3|                    |
    ++-+-+--------------------+
    +|1|2|                    |
    +|v|v|  SVs from class 3  |
    +|3|3|                    |
    ++-+-+--------------------+
    +
    +so we need to see nSV of each classes. +
      
    +> m.nSV
    +
    +ans =
    +
    +     3
    +    21
    +    18
    +
    +Suppose the goal is to find the vector w of classes +1 vs 3. Then +y_i alpha_i of training 1 vs 3 are +
      
    +> coef = [m.sv_coef(1:3,2); m.sv_coef(25:42,1)];
    +
    +and SVs are: +
      
    +> SVs = [m.SVs(1:3,:); m.SVs(25:42,:)];
    +
    +Hence, w is +
    +> w = SVs'*coef;
    +
    +For rho, +
    +> m.rho
    +
    +ans =
    +
    +    1.1465
    +    0.3682
    +   -1.9969
    +> b = -m.rho(2);
    +
    +because rho is arranged by 1vs2 1vs3 2vs3. + + + +

    +[Go Top] +


    + +Q: Is there an OCTAVE interface for libsvm? +
    +

    +Yes, after libsvm 2.86, the matlab interface +works on OCTAVE as well. Please use make.m by typing +

    +>> make 
    +
    +under OCTAVE. +

    +[Go Top] +


    + +Q: How to handle the name conflict between svmtrain in the libsvm matlab interface and that in MATLAB bioinformatics toolbox? +
    +

    +The easiest way is to rename the svmtrain binary +file (e.g., svmtrain.mexw32 on 32-bit windows) +to a different +name (e.g., svmtrain2.mexw32). +

    +[Go Top] +


    + +Q: On Windows I got an error message "Invalid MEX-file: Specific module not found" when running the pre-built MATLAB interface in the windows sub-directory. What should I do? +
    +

    + +The error usually happens +when there are missing runtime components +such as MSVCR100.dll on your Windows platform. +You can use tools such as +Dependency +Walker to find missing library files. + +

    +For example, if the pre-built MEX files are compiled by +Visual C++ 2010, +you must have installed +Microsoft Visual C++ Redistributable Package 2010 +(vcredist_x86.exe). You can easily find the freely +available file from Microsoft's web site. + +

    +For 64bit Windows, the situation is similar. If +the pre-built files are by +Visual C++ 2008, then you must have +Microsoft Visual C++ Redistributable Package 2008 +(vcredist_x64.exe). +

    +[Go Top] +


    + +Q: LIBSVM supports 1-vs-1 multi-class classification. If instead I would like to use 1-vs-rest, how to implement it using MATLAB interface? +
    + +

    +Please use code in the following directory. The following example shows how to +train and test the problem dna (training and testing). + +

    Load, train and predict data: +

    +[trainY trainX] = libsvmread('./dna.scale');
    +[testY testX] = libsvmread('./dna.scale.t');
    +model = ovrtrain(trainY, trainX, '-c 8 -g 4');
    +[pred ac decv] = ovrpredict(testY, testX, model);
    +fprintf('Accuracy = %g%%\n', ac * 100);
    +
    +Conduct CV on a grid of parameters +
    +bestcv = 0;
    +for log2c = -1:2:3,
    +  for log2g = -4:2:1,
    +    cmd = ['-q -c ', num2str(2^log2c), ' -g ', num2str(2^log2g)];
    +    cv = get_cv_ac(trainY, trainX, cmd, 3);
    +    if (cv >= bestcv),
    +      bestcv = cv; bestc = 2^log2c; bestg = 2^log2g;
    +    end
    +    fprintf('%g %g %g (best c=%g, g=%g, rate=%g)\n', log2c, log2g, cv, bestc, bestg, bestcv);
    +  end
    +end
    +
    +

    +[Go Top] +


    + +Q: I tried to install matlab interface on mac, but failed. What should I do? +
    + +

    +We assume that in a matlab command window you change directory to libsvm/matlab and type +

    +>> make
    +
    +We discuss the following situations. + +
      +
    1. An error message like "libsvmread.c:1:19: fatal error: +stdio.h: No such file or directory" appears. + +

      +Reason: "make" looks for a C++ compiler, but +no compiler is found. To get one, you can +

        +
      • Install XCode offered by Apple Inc. +
      • Install XCode Command Line Tools. +
      + +

      +

    2. On OS X with Xcode 4.2+, I got an error message like "llvm-gcc-4.2: +command not found." + +

      +Reason: Since Apple Inc. only ships llsvm-gcc instead of gcc-4.2, +llvm-gcc-4.2 cannot be found. + +

      +If you are using Xcode 4.2-4.6, +a related solution is offered at +http://www.mathworks.com/matlabcentral/answers/94092. + +

      +On the other hand, for Xcode 5 (including Xcode 4.2-4.6), in a Matlab command window, enter +

        +
      • cd (matlabroot) +
      • cd bin +
      • Backup your mexopts.sh first +
      • edit mexopts.sh +
      • Scroll down to "maci64" section. Change +
        +		CC='llvm-gcc-4.2'
        +		CXX='llvm-g++-4.2'
        +
        +to +
        +		CC='llvm-gcc'
        +		CXX='llvm-g++'
        +
        +
      + +Please also ensure that SDKROOT corresponds to the SDK version you are using. + +

      +

    3. Other errors: you may check http://www.mathworks.com/matlabcentral/answers/94092. + +
    +

    +[Go Top] +


    + +Q: I tried to install octave interface on windows, but failed. What should I do? +
    + +

    +This may be due to +that Octave's math.h file does not +refer to the correct location of Visual Studio's math.h. +Please see this nice page for detailed +instructions. +

    +[Go Top] +


    +

    +LIBSVM home page +

    + + diff --git a/src/backend/app/algorithms/evaluates/libsvm/Makefile b/src/backend/app/algorithms/evaluates/libsvm/Makefile new file mode 100644 index 0000000..db6ab34 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/libsvm/Makefile @@ -0,0 +1,25 @@ +CXX ?= g++ +CFLAGS = -Wall -Wconversion -O3 -fPIC +SHVER = 2 +OS = $(shell uname) + +all: svm-train svm-predict svm-scale + +lib: svm.o + if [ "$(OS)" = "Darwin" ]; then \ + SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,libsvm.so.$(SHVER)"; \ + else \ + SHARED_LIB_FLAG="-shared -Wl,-soname,libsvm.so.$(SHVER)"; \ + fi; \ + $(CXX) $${SHARED_LIB_FLAG} svm.o -o libsvm.so.$(SHVER) + +svm-predict: svm-predict.c svm.o + $(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm +svm-train: svm-train.c svm.o + $(CXX) $(CFLAGS) svm-train.c svm.o -o svm-train -lm +svm-scale: svm-scale.c + $(CXX) $(CFLAGS) svm-scale.c -o svm-scale +svm.o: svm.cpp svm.h + $(CXX) $(CFLAGS) -c svm.cpp +clean: + rm -f *~ svm.o svm-train svm-predict svm-scale libsvm.so.$(SHVER) diff --git a/src/backend/app/algorithms/evaluates/libsvm/Makefile.win b/src/backend/app/algorithms/evaluates/libsvm/Makefile.win new file mode 100644 index 0000000..b1d3570 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/libsvm/Makefile.win @@ -0,0 +1,33 @@ +#You must ensure nmake.exe, cl.exe, link.exe are in system path. +#VCVARS64.bat +#Under dosbox prompt +#nmake -f Makefile.win + +########################################## +CXX = cl.exe +CFLAGS = /nologo /O2 /EHsc /I. /D _WIN64 /D _CRT_SECURE_NO_DEPRECATE +TARGET = windows + +all: $(TARGET)\svm-train.exe $(TARGET)\svm-predict.exe $(TARGET)\svm-scale.exe $(TARGET)\svm-toy.exe lib + +$(TARGET)\svm-predict.exe: svm.h svm-predict.c svm.obj + $(CXX) $(CFLAGS) svm-predict.c svm.obj -Fe$(TARGET)\svm-predict.exe + +$(TARGET)\svm-train.exe: svm.h svm-train.c svm.obj + $(CXX) $(CFLAGS) svm-train.c svm.obj -Fe$(TARGET)\svm-train.exe + +$(TARGET)\svm-scale.exe: svm.h svm-scale.c + $(CXX) $(CFLAGS) svm-scale.c -Fe$(TARGET)\svm-scale.exe + +$(TARGET)\svm-toy.exe: svm.h svm.obj svm-toy\windows\svm-toy.cpp + $(CXX) $(CFLAGS) svm-toy\windows\svm-toy.cpp svm.obj user32.lib gdi32.lib comdlg32.lib -Fe$(TARGET)\svm-toy.exe + +svm.obj: svm.cpp svm.h + $(CXX) $(CFLAGS) -c svm.cpp + +lib: svm.cpp svm.h svm.def + $(CXX) $(CFLAGS) -LD svm.cpp -Fe$(TARGET)\libsvm -link -DEF:svm.def + +clean: + -erase /Q *.obj $(TARGET)\*.exe $(TARGET)\*.dll $(TARGET)\*.exp $(TARGET)\*.lib + diff --git a/src/backend/app/algorithms/evaluates/libsvm/README b/src/backend/app/algorithms/evaluates/libsvm/README new file mode 100644 index 0000000..5b32236 --- /dev/null +++ b/src/backend/app/algorithms/evaluates/libsvm/README @@ -0,0 +1,769 @@ +Libsvm is a simple, easy-to-use, and efficient software for SVM +classification and regression. It solves C-SVM classification, nu-SVM +classification, one-class-SVM, epsilon-SVM regression, and nu-SVM +regression. It also provides an automatic model selection tool for +C-SVM classification. This document explains the use of libsvm. + +Libsvm is available at +http://www.csie.ntu.edu.tw/~cjlin/libsvm +Please read the COPYRIGHT file before using libsvm. + +Table of Contents +================= + +- Quick Start +- Installation and Data Format +- `svm-train' Usage +- `svm-predict' Usage +- `svm-scale' Usage +- Tips on Practical Use +- Examples +- Precomputed Kernels +- Library Usage +- Java Version +- Building Windows Binaries +- Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc. +- MATLAB/OCTAVE Interface +- Python Interface +- Additional Information + +Quick Start +=========== + +If you are new to SVM and if the data is not large, please go to +`tools' directory and use easy.py after installation. It does +everything automatic -- from data scaling to parameter selection. + +Usage: easy.py training_file [testing_file] + +More information about parameter selection can be found in +`tools/README.' + +Installation and Data Format +============================ + +On Unix systems, type `make' to build the `svm-train' and `svm-predict' +programs. Run them without arguments to show the usages of them. + +On other systems, consult `Makefile' to build them (e.g., see +'Building Windows binaries' in this file) or use the pre-built +binaries (Windows binaries are in the directory `windows'). + +The format of training and testing data file is: + +