unzipVisio <- function(x.dir = ".", exdir = "tmp"){
# ---------------------------------------------------------------------------
# 指定したフォルダにあるvsdx形式のvisioファイルを
# 指定したフォルダへ展開する。
#
# Args:
# x.dir: vsdx file source path(class is character)
# exdir: dst unzip path(class is character)
#
# Returns:
# nothing
# ---------------------------------------------------------------------------
x <- dir(x.dir, pattern = ".vsdx")
if (file.exists(exdir)){
dir.create(exdir)
}
lapply(x, unzip, exdir = unzip.path)
}
extractVisioFile <- function(fname, exdir){
# ---------------------------------------------------------------------------
# visio fileを展開する。
#
# Args:
# x: visio filename(class is character)
# exdir: extract path (class is character)
#
# Returns:
# extract path after extract.(class is character)
#
#
# ---------------------------------------------------------------------------
tryCatch(
(rtn <- unzip(fname, exdir = exdir )),
error = function(e) print(e)
)
z <- grep(".xml.rels", rtn)
grep("pages", rtn[-z], value = TRUE)
}
getXML.documentType <- function(x){
# ---------------------------------------------------------------------------
# visio xml データのattributes(xml tab)情報をdoc typeとして返す。
#
# Args:
# x: visio data(class is character)
#
# Returns:
# visio doc type(class is character)
#
# todo:
# そもそも、atributes情報をそのまま利用すれば、この関数は不要なので
# この関数を利用しない処理を検討すべき。
#
# ---------------------------------------------------------------------------
rtn <- c()
x <- attributes(x)
if(any(x$names == "Page")){
rtn <- c(rtn, "info.pages")
}
if(any(x$names == "Connects")){
rtn <- c(rtn, "info.Connects")
}
if(any(x$names == "Shapes")){
rtn <- c(rtn, "info.Shapes")
}
if(any(x$names == "Shape")){
rtn <- c(rtn, "info.Shape")
}
if(any(x$names == "Text")){
rtn <- c(rtn, "info.Text")
}
if(any(x$names == "Cell")){
rtn <- c(rtn, "info.Cell")
}
if(any(x$names == ".attrs")){
rtn <- c(rtn, "info.Attrs")
}
if(is.null(rtn)){
print("not found document type")
print(x$names)
}
rtn
}
parseInfoPagesXML <- function(x){
# ---------------------------------------------------------------------------
# visio xml データのattributesからdoc typeを返す
#
# Args:
# x: visio data
#
# Returns:
# visio doc type(character)
#
# todo:
# そもそも、atributes情報をそのまま利用すれば、この関数は不要なので
# この関数を利用しない処理を検討すべき。
#
# ---------------------------------------------------------------------------
z <- which(names(x) != ".attrs")
n <- length(z)
if(n == 0){
print("parse error")
return(NA)
}
# debug ----------------
if(all(is.null(x[z]))){
browser()
print(paste(names(z), "data is NULL.", sep = " "))
return(NA)
}
# ----------------------
rtn <- lapply(x[z], listToDf, ".attrs")
rtn <- bind.data.frame(rtn)
rtn
}
parseInfoConnectsXML <- function(x){
# ---------------------------------------------------------------------------
# visio xml からConnects tagに含まれる情報を返す。
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio connects data (class is list)
#
# ---------------------------------------------------------------------------
z <- which(names(x) == "Connects")
n <- length(z)
if(n == 0){
print("parse error")
return(NA)
}
rtn <- lapply(x[[z]], t)
rtn <- lapply(rtn, as.data.frame, stringsAsFactors = FALSE)
#browser()
rtn <- bind.data.frame(rtn)
rtn <- getConnect(rtn)
rtn <- bind.data.frame(rtn)
rtn
}
parseInfoShapesXML <- function(x){
# ---------------------------------------------------------------------------
# visio xml かShapes tagに含まれる情報を返す。
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio shapes data (class is data.frame)
#
# ---------------------------------------------------------------------------
z <- which(names(x) == "Shapes")
n <- length(z)
if(n == 0){
print("parse error")
return(NA)
}
#docType <- getXML.documentType(x[[z]])
rtn <- lapply(x[[z]], parseVisioDocAll)
#rtn <- list(Shapes = rtn)
rtn <- lapply(rtn, listToDf.byCol, stringsAsFactors = FALSE)
rtn <- bind.data.frame(rtn)
if (any(names(rtn) == "Name")){
rtn[["Name"]] <- iconv(rtn[["Name"]], "UTF-8", "cp932")
}
rtn
}
parseInfoShapeXML <- function(x, rtnAsDF = TRUE){
# ---------------------------------------------------------------------------
# visio xml から Shape tagに含まれる情報を返す。
#
# Args:
# x: visio data(class is list)
# rtnAsDF: as data.frame Flag(class is logical)
# TRUE: return data is data.frame
# FALSE: return data is list
#
# Returns:
# visio shape data (class is data.frame or list.)
#
# ---------------------------------------------------------------------------
docType <- getXML.documentType(x)
rtn <- lapply(docType, parseVisioDoc, x)
names(rtn) <- docType
if(rtnAsDF){
rtn <- listToDf.byCol(rtn, addName = TRUE, stringsAsFactors = FALSE)
}
rtn
}
parseInfoShapeAttrsXML <- function(x){
# ---------------------------------------------------------------------------
# Shape tagに含まれる.attrs情報を返す。
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio .attris data of shape.(class is list.)
#
# ---------------------------------------------------------------------------
z <- which(names(x) == ".attrs")
n <- length(z)
if(n == 0){
print("parse error")
return(NA)
}
# debug ----------------
if(all(is.null(x[[z]]))){
#browser()
print(".attrs data is NULL.")
return(NA)
}
# ----------------------
rtn <- lapply(x[z], listToDf)
rtn <- bind.data.frame(rtn)
rtn
}
parseInfoShapeTextXML <- function(x){
# ---------------------------------------------------------------------------
# Shape tagに含まれるText情報を返す。
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio text data of shape.(class is list.)
#
# ---------------------------------------------------------------------------
#browser()
z <- which(names(x) == "Text")
n <- length(z)
if(n == 0){
print("parse error")
return(NA)
}
if(all(is.null(x[[z]]))){
#browser()
print("Text data is NULL")
return(NA)
}
rtn <- x[[z]]
rtn.names <- names(rtn)
if (is.list(rtn)){
rtn <- lapply(rtn, listToDf)
}
rtn <- bind.data.frame(rtn)
if (is.null(rtn.names)){
rtn.names <- "text"
}
names(rtn) <- rtn.names
# debug -----------------------------------------
if(any(unlist(lapply(rtn, class)) == "factor")){
browser()
}
# -----------------------------------------------
rtn
}
parseCellXML <- function(x){
# ---------------------------------------------------------------------------
# Shape tagに含まれるCell情報を返す。
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio cell data of shape.(class is list.)
#
# ---------------------------------------------------------------------------
x.name <- "unkown"
z <- (names(x) == "N")
n <- length(z)
if(any(z)){
x.name <- x[z]
}
rtn <- x[!z]
names(rtn) <- paste(x.name, names(x[!z]), sep = ".")
rtn <- as.data.frame(t(rtn), stringsAsFactors = FALSE)
rtn
}
parseInfoCellXML <- function(x){
z <- which(attributes(x)$names == "Cell")
n <- length(z)
if (n == 0){
print("parse error")
return(NA)
}
rtn <- lapply(x[z], parseCellXML)
rtn <- listToDf.byCol(rtn, stringsAsFactors = FALSE)
rtn
}
listToDf <- function(x, x.label = NULL){
# ---------------------------------------------------------------------------
# trance forme list to data.frame。
#
# Args:
# x: list data(class is list)
# x.label: names character(class is character)
#
#
# Returns:
# visio text data of shape.(class is list.)
#
# ---------------------------------------------------------------------------
rtn <- NA
if(is.null(x)){
browser()
return(rtn)
}
rtn <- as.data.frame(t(x), stringsAsFactors = FALSE)
if(!is.null(x.label)){
z <- (names(x) == x.label)
if(any(z)){
if(is.null(x[[x.label]])){
browser()
}
rtn <- as.data.frame(t(x[[x.label]]), stringsAsFactors = FALSE)
}
}
# debug -----------------------------------------
if(any(unlist(lapply(rtn, class)) == "factor")){
browser()
}
# -----------------------------------------------
rtn
}
bind.data.frame <- function(x){
# ---------------------------------------------------------------------------
# bind the data.frame in the list and return it the data.frame.
#
# Args:
# x: list data(class is list)
#
# Returns:
# data.frame (class is data.frame.)
#
# ---------------------------------------------------------------------------
rtn <- data.frame()
for (i in x){
n.rtn <- length(rtn)
n.i 0), (n.rtn != n.i))){
rtn <- merge(rtn, i, all = TRUE)
next
}
if(any(names(rtn) != names(i))){
rtn <- merge(rtn, i, all = TRUE)
next
}
rtn <- rbind(rtn, i, stringsAsFactors = FALSE)
}
rtn
}
listToDf.byCol <- function(x, addName = FALSE, ...){
# ---------------------------------------------------------------------------
# Transform the list to a data.frame by column.
#
# Args:
# x: list data(class is list)
# addName:
# TRUE: add data.frame names
#
# Returns:
# data.frame (class is data.frame.)
#
# ---------------------------------------------------------------------------
#x <- lapply(x, t)
rtn <- data.frame()
rtn.names <- c()
# for (i in x){
# if(length(rtn) <= 0){
# rtn <- data.frame(i, ...)
# next
# }
# rtn <- data.frame(rtn, i, ...)
# }
# for (i in names(x)){
# if (length(rtn) <= 0){
# rtn <- data.frame(x[[i]], ...)
# rtn.names <- paste(i, names(x[[i]]), sep = ".")
# next
# }
# rtn <- data.frame(rtn, x[[i]], ...)
# rtn.names <- c(rtn.names, paste(i, names(x[[i]]), sep = "."))
# }
n <- length(x)
for (i in 1:n){
if (length(rtn) <= 0){
rtn <- data.frame(x[[i]], ...)
rtn.names <- paste(names(x)[i], names(x[[i]]), sep = ".")
next
}
rtn <- data.frame(rtn, x[[i]], ...)
rtn.names <- c(rtn.names, paste(names(x)[i], names(x[[i]]), sep = "."))
}
if(addName){
names(rtn) <- rtn.names
}
# debug -----------------------------------------
if(any(unlist(lapply(rtn, class)) == "factor")){
browser()
}
# -----------------------------------------------
rtn
}
getParseXML.function <- function(x){
# ---------------------------------------------------------------------------
# return to the parse XML function.
#
# Args:
# x: XML tag info type(class is character)
#
# Returns:
# parse XML function (class is function.)
#
# ---------------------------------------------------------------------------
switch (x,
"info.pages" = parseInfoPagesXML,
"info.Connects" = parseInfoConnectsXML,
"info.Shapes" = parseInfoShapesXML,
"info.Shape" = parseInfoShapeXML,
"info.Cell" = parseInfoCellXML,
"info.Text" = parseInfoShapeTextXML,
"info.Attrs" = parseInfoShapeAttrsXML,
print(pasete("not found", x, sep = " : "))
)
}
getConnect <- function(x.df){
# ---------------------------------------------------------------------------
# get Connect infomation.
#
# Args:
# x.df: connect infomation XML tag(class is list)
#
# Returns:
# connect infomation data. (class is data.frame)
#
# ---------------------------------------------------------------------------
f.0 <- function(x, x.df){
x.df.names <- c("FromPart", "ToSheet", "ToCell", "ToPart")
z <- (x.df$FromSheet == x)
#z.Begin <- z & (x.df$FromCell == "BeginX")
#z.End <- z & (x.df$FromCell == "EndX")
rtn <- data.frame(FromSheet = x.df[z,]$FromSheet[1],
f.1("BeginX", x.df[z,]),
f.1("EndX", x.df[z,]),
stringsAsFactors = FALSE)
rtn
}
f.1 <- function(x, x.df){
x.df.names <- c("FromPart", "ToSheet", "ToCell", "ToPart")
z <- (x.df$FromCell == x)
#z.Begin <- z & (x.df$FromCell == "BeginX")
#z.End <- z & (x.df$FromCell == "EndX")
rtn <- lapply(rep(NA,4), data.frame)
names(rtn) <- x.df.names
rtn <- data.frame(rtn)
if (any(z)){
rtn <- x.df[z, x.df.names]
}
names(rtn) <- paste(x, x.df.names, sep = ".")
rtn
}
from.sheets <- names(table(x.df$FromSheet))
lapply(from.sheets, f.0, x.df)
}
parseVisioDoc <- function(doc.type, x){
# ---------------------------------------------------------------------------
# parse Visio document.
#
# Args:
# x: XML data(class is list)
#
# Returns:
# visio document contents.(class is list.)
#
# ---------------------------------------------------------------------------
f <- getParseXML.function(doc.type)
rtn <- f(x)
rtn
}
parseVisioDocAll <- function(x){
# ---------------------------------------------------------------------------
# visio xml データからxml contents dataを返すparseVisioDoc()のラッパー関数
#
# Args:
# x: visio data(class is list)
#
# Returns:
# visio xml contents data(class is list)
#
# ---------------------------------------------------------------------------
docType <- getXML.documentType(x)
rtn <- lapply(docType, parseVisioDoc, x)
names(rtn) <- docType
rtn
}
parseVisio <- function(src.xml.path){
# ---------------------------------------------------------------------------
# visio xml データからxml contents dataを返す.
#
# Args:
# src.xml.path: visio data(class is list)
#
# Returns:
# visio xml contents data(class is list)
#
# ---------------------------------------------------------------------------
x <- xmlToList(xmlRoot(xmlParse(src.xml.path)))
rtn <- parseVisioDocAll(x)
rtn$file.path <- src.xml.path
rtn$content.type <- "page"
if (any(attributes(rtn)$names == "info.pages")){
rtn$content.type <- "info.pages"
}
rtn
}
old.wd <- getwd()
my.wd <- "%mypath%/parseVisio"
setwd(my.wd)
#vsdx の展開
install.packages("XML")
install.packages("xml2")
library(XML)
library(xml2)
source("parseVisioFunction.R")
src.xml.path <- extractVisioFile("testFlow.vsdx", exdir = "tmp")
x <- lapply(src.xml.path, parseVisio)