zoomGroupStats: Analyze Text, Audio, and Video from 'Zoom' Meetings

Documented in flattenSelf

#' Combine transcript or chat files into conversational turns
#'
#' Different transcription algorithms mark utterances in different ways. This function 
#' will combine consecutive utterances (or messages) by the same person into conversational 
#' turns. The user can also specify a gap in between messages to use to determine
#' whether they should be combined or not.  
#' @param inputData data.frame output from either processZoomTranscript or processZoomChat
#' @param inputType either 'transcript' or 'chat'
#' @param meetingId character giving the name of the meeting identifier variable
#' @param speakerId character giving the name of the speaker identifier variable
#' @param gapLength integer giving the number of seconds for marking distinct turns by the same speaker. Consecutive utterances by the same speaker of greater than or equal to this value will be treated as different conversational turns. 
#'
#' @return a data.frame that is the same format as inputData, but where the observations are the new, compressed conversational turns. 
#' @export
#'
#' @importFrom rlang .data
#'
#' @examples
#' newChat = flattenSelf(inputData = 
#' sample_chat_processed[sample_chat_processed$batchMeetingId=="00000000001", ], 
#' inputType="chat", meetingId = "batchMeetingId", 
#' speakerId="userName", gapLength=120)
#' 
#' newTranscript = flattenSelf(inputData = 
#' sample_transcript_processed[sample_transcript_processed$batchMeetingId=="00000000001", ], 
#' inputType="transcript", meetingId = "batchMeetingId", 
#' speakerId="userName", gapLength=120)
#' 
flattenSelf = function(inputData, inputType, meetingId, speakerId, gapLength) {
  #Create lagged variables
  inputData$speakerCurrent = inputData[,speakerId]
  if(inputType == "transcript") {
    inputData = inputData[order(inputData$utteranceEndSeconds), ]
    inputData[, c("speakerBefore", "priorUtteranceEndSeconds")] = dplyr::lag(inputData[, c("speakerCurrent", "utteranceEndSeconds")])
    inputData$turnGap = inputData$utteranceStartSeconds - inputData$priorUtteranceEndSeconds
    
    inputData$speakerChange = ifelse(inputData$speakerCurrent != inputData$speakerBefore, TRUE, FALSE)
    inputData$longGap = ifelse(inputData$turnGap >= gapLength, TRUE, FALSE)
    inputData$newTurn = ifelse(inputData$speakerChange | inputData$longGap | is.na(inputData$speakerBefore), TRUE, FALSE)
    turnId = 0
    for(r in 1:nrow(inputData)) {
      if(inputData[r, "newTurn"]) {
        turnId = turnId + 1
      }
      inputData[r, "turnId"] = turnId
    }
    
    turnData = inputData %>%
      dplyr::group_by(turnId) %>%
      dplyr::summarise(utteranceId = unique(.data$turnId), utteranceStartSeconds = min(.data$utteranceStartSeconds), utteranceStartTime = min(.data$utteranceStartTime), utteranceEndSeconds = max(.data$utteranceEndSeconds),utteranceEndTime = max(.data$utteranceEndTime), utteranceTimeWindow = (max(.data$utteranceEndSeconds)-min(.data$utteranceStartSeconds)), {{speakerId}} := unique(.data[[speakerId]]), utteranceMessage = paste(.data$utteranceMessage, collapse = " "), utteranceLanguage=unique(.data$utteranceLanguage), {{meetingId}} := unique(.data[[meetingId]]))		
    
    
  } else if(inputType == "chat") {
    inputData = inputData[order(inputData$messageTime), ]  
    inputData[, c("speakerBefore", "priorMessageTime")] = dplyr::lag(inputData[, c("speakerCurrent", "messageTime")])
    inputData$turnGap = as.numeric(difftime(inputData$messageTime, inputData$priorMessageTime, units="secs"))
    
    inputData$speakerChange = ifelse(inputData$speakerCurrent != inputData$speakerBefore, TRUE, FALSE)
    inputData$longGap = ifelse(inputData$turnGap >= gapLength, TRUE, FALSE)
    inputData$newTurn = ifelse(inputData$speakerChange | inputData$longGap | is.na(inputData$speakerBefore), TRUE, FALSE)
    turnId = 0
    for(r in 1:nrow(inputData)) {
      if(inputData[r, "newTurn"]) {
        turnId = turnId + 1
      }
      inputData[r, "turnId"] = turnId
    }
    
    turnData = inputData %>%
      dplyr::group_by(turnId) %>%
      dplyr::summarise(messageId = unique(.data$turnId), messageSeconds = min(.data$messageSeconds), messageTime = min(.data$messageTime), {{speakerId}} := unique(.data[[speakerId]]), message = paste(.data$message, collapse = " "), messageLanguage=unique(.data$messageLanguage), {{meetingId}} := unique(.data[[meetingId]]))		
  }
  colnames(turnData)[which(names(turnData) == "speakerId")] = speakerId 
  turnData = data.frame(turnData)
  return(turnData)
}

andrewpknight/zoomGroupStats documentation built on April 14, 2022, 6:16 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

andrewpknight/zoomGroupStats
Analyze Text, Audio, and Video from 'Zoom' Meetings

R/flattenSelf.R
In andrewpknight/zoomGroupStats: Analyze Text, Audio, and Video from 'Zoom' Meetings

Defines functions flattenSelf

Documented in flattenSelf

R Package Documentation

Browse R Packages

We want your feedback!

andrewpknight/zoomGroupStats Analyze Text, Audio, and Video from 'Zoom' Meetings

R/flattenSelf.R In andrewpknight/zoomGroupStats: Analyze Text, Audio, and Video from 'Zoom' Meetings

Defines functions flattenSelf

Documented in flattenSelf

R Package Documentation

Browse R Packages

We want your feedback!

andrewpknight/zoomGroupStats
Analyze Text, Audio, and Video from 'Zoom' Meetings

R/flattenSelf.R
In andrewpknight/zoomGroupStats: Analyze Text, Audio, and Video from 'Zoom' Meetings